Working with Spark on R Studio

The instructions are related to RStudio on Databricks.

# connect to Databricks on this server

library(SparkR)
SparkR::sparkR.session()

library(sparklyr)
sc <- spark_connect(method = "databricks")

library(dplyr)

List table in mydb database:

tbl_change_db(sc, "mydb") #change active db to "mydb" from "default"
src_tbls(sc) # list tables

Read a table from mydb database:

spark_df <- spark_read_table(sc, "mytable") # read as Spark DataFrame
r_df <- collect(spark_df) # convert to R DataFrame

Save R DataFrame as Spark table:

tbl_change_db(sc, "default") # changing db to "default" - where  we want to write to
# copy data into Spark DataFrame
t <- copy_to(sc,
             r_df,
             "mytable",
             overwrite = TRUE)
# save as table in Spark
spark_write_table(t, "mytable", mode = "overwrite")
# list tables
src_tbls(sc)
Have a question⁉ Contact me.