score:0

Accepted answer

you have some spelling errors, and finally the last function count cannot be there you need to import org.apache.spark.sql.functions._

check this, i corrct the spelling errors and add the imports

import org.apache.spark.{ sparkcontext, sparkconf }
import org.apache.spark.sql.functions._

/**
 * created by anquegi on 01/06/15.
 */
object qspark162015 extends app {

  val conf = new sparkconf()
    .setmaster("local[2]")
    .setappname("qspark162015")
    .set("spark.executor.memory", "2g")

  val sc = new sparkcontext(conf)

  val sqlcontext = new org.apache.spark.sql.sqlcontext(sc)

  import sqlcontext.implicits._

  val df = sc.parallelize(array((1, 30), (2, 10), (3, 20), (1, 10), (2, 30))).todf("books", "readers")
  val results = df.join(
    df.select($"books" as "r_books", $"readers" as "r_readers"),
    $"readers" === $"r_readers" and $"books" < $"r_books"
  )
    .groupby($"books", $"r_books")
    .agg($"books", $"r_books", count($"readers"))

  results.foreach(println _)

  sc.stop()

}

Related Query

More Query from same tag