解决问题。
注:
-spark版本-2.0
-hadoop版本-2.7
# install.packages("devtools")
# devtools::install_github("rstudio/sparklyr")
library(sparklyr)
library(dplyr)
# conf$sparklyr.defaultPackages <- "org.apache.hadoop:hadoop-aws:2.7.3"
# config$spark.executor.memory <- "4g"
sc <- spark_connect(master = "local",config = conf)
#Get spark context
ctx <- sparklyr::spark_context(sc)
#Use below to set the java spark context
jsc <- invoke_static(
sc,
"org.apache.spark.api.java.JavaSparkContext",
"fromSparkContext",
ctx
)
#set the s3 configs:
hconf <- jsc %>% invoke("hadoopConfiguration")
hconf %>% invoke("set","fs.s3a.access.key", "xxxx")
hconf %>% invoke("set","fs.s3a.secret.key", "xxxx")
# check if spar session is active
sparklyr::spark_connection_is_open(sc=sc)
small_file = "s3a://temp-sg/MVC"
temp<-spark_read_csv(sc,name = "MVC",path=small_file,infer_schema = TRUE)
spark_disconnect(sc)