score:3

Accepted answer

You should use None: Option[String] instead of None

scala> val maybeString = None: Option[String]
maybeString: Option[String] = None

scala> val sampleData = spark.createDataset(Seq(
     |   (1, Some("Yes"), maybeString),
     |   (2, maybeString, maybeString),
     |   (3, Some("Okay"), maybeString),
     |   (4, maybeString, maybeString))).toDF("id", "title", "value")
sampleData: org.apache.spark.sql.DataFrame = [id: int, title: string ... 1 more field]

scala> sampleData.show
+---+-----+-----+
| id|title|value|
+---+-----+-----+
|  1|  Yes| null|
|  2| null| null|
|  3| Okay| null|
|  4| null| null|
+---+-----+-----+

score:1

Or you can use: null.asInstanceOf[String] If you're just dealing with Strings

val df1 = sc.parallelize(Seq((1, "Yes", null.asInstanceOf[String]),
     | (2, null.asInstanceOf[String], null.asInstanceOf[String]),
     | (3, "Okay", null.asInstanceOf[String]),
     | (4, null.asInstanceOf[String], null.asInstanceOf[String]))).toDF("id", "title", "value")
df1: org.apache.spark.sql.DataFrame = [id: int, title: string, value: string]

scala> df1.show
+---+-----+-----+
| id|title|value|
+---+-----+-----+
|  1|  Yes| null|
|  2| null| null|
|  3| Okay| null|
|  4| null| null|
+---+-----+-----+

Related Query

More Query from same tag