score:0

`found a workaround, see if there is a better solution using one dataframe and no udf'

import org.apache.spark.sql.functions.regexp_replace
val df = spark.createdataframe(seq(
  (1, "9/11/2020"),
  (2, "10/11/2020"),
  (3, "1/1/2020"),  
  (4, "12/7/2020"))).todf("id", "x4")

val newdf = df.withcolumn("x4new", regexp_replace(df("x4"), "(?:(\\b\\d{2}))/(?:(\\d))/(?:(\\d{4})\\b)", "$1/0$2/$3"))
val newdf1 = newdf.withcolumn("x4new1", regexp_replace(newdf("x4new"), "(?:(\\b\\d{1}))/(?:(\\d))/(?:(\\d{4})\\b)", "0$1/$2/$3"))
val newdf2 = newdf1.withcolumn("x4new2", regexp_replace(newdf1("x4new1"), "(?:(\\b\\d{1}))/(?:(\\d{2}))/(?:(\\d{4})\\b)", "0$1/$2/$3"))
val newdf3 = newdf2.withcolumn("date", to_date(regexp_replace(newdf2("x4new2"), "(?:(\\b\\d{2}))/(?:(\\d{1}))/(?:(\\d{4})\\b)", "$1/0$2/$3"),"mm/dd/yyyy"))
val formateddatadf = newdf3
                    .drop("x4new")
                    .drop("x4new1")
                    .drop("x4new2")

formateddatadf.printschema
formateddatadf.show

output looks like as follows

root
 |-- id: integer (nullable = false)
 |-- x4: string (nullable = true)
 |-- date: date (nullable = true)

+---+----------+----------+
| id|        x4|      date|
+---+----------+----------+
|  1| 9/11/2020|2020-09-11|
|  2|10/11/2020|2020-10-11|
|  3|  1/1/2020|2020-01-01|
|  4| 12/7/2020|2020-12-07|
+---+----------+----------+

score:2

use from_unixtime,unix_timestamp (or) date_format,to_timestamp,(or) to_date in built functions.

example:(in spark-2.4)

import org.apache.spark.sql.functions._

//sample data
val df = spark.createdataframe(seq((1, "9/11/2020"),(2, "10/11/2020"),(3, "1/1/2020"),  (4, "12/7/2020"))).todf("id", "x4")

//using from_unixtime
df.withcolumn("date",from_unixtime(unix_timestamp(col("x4"),"mm/dd/yyyy"),"mm/dd/yyyy")).show()

//using date_format
df.withcolumn("date",date_format(to_timestamp(col("x4"),"mm/dd/yyyy"),"mm/dd/yyyy")).show()
df.withcolumn("date",date_format(to_date(col("x4"),"mm/dd/yyyy"),"mm/dd/yyyy")).show()
//+---+----------+----------+
//| id|        x4|      date|
//+---+----------+----------+
//|  1| 9/11/2020|09/11/2020|
//|  2|10/11/2020|10/11/2020|
//|  3|  1/1/2020|01/01/2020|
//|  4| 12/7/2020|12/07/2020|
//+---+----------+----------+

Related Query

More Query from same tag