Flatten Spark Dataframe in Scala
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Column, DataFrame}
class SparkFlattener {
  private def flatten(schema: StructType, path: String = null, aliasPrefix: String = null): Array[Column] =   {
    schema.fields.flatMap(f => {
      val colName = if (path == null) f.name else (f"${path}.${f.name}")
      val colAlias = if (aliasPrefix == null) f.name else (f"${aliasPrefix}${f.name.capitalize}")
      f.dataType match {
        case st: StructType => flatten(st, colName, colAlias)
        case _ => Array(col(colName).as(colAlias))
      }
    })
  }
  /**
   * Flattens DataFrame i.e. transforms structs into flat columns in long name.
   * Arrays are not flattened as they can't be.
   */
  def flatten(df: DataFrame): DataFrame = {
    val cols = flatten(df.schema)
    df.select(cols: _*)
  }
}
To contact me, send an email anytime or leave a comment below.
