一、SparkUtils工具类
import org.apache.spark.{
SparkConf, SparkContext}object SparkUtils {
/*** 默认的master url路径*/val DEFAULT_MASTER = "local[*]"/*** 默认master为local[*]的获取sparkContext*/def getSparkContext(appName:String):SparkContext = getSparkContext(appName, DEFAULT_MASTER)def getSparkContext(appName:String, master:String):SparkContext = new SparkContext(new SparkConf().setAppName(appName).setMaster(master))/*** 释放sparkContext*/def close(sc:SparkContext) = if(sc != null) sc.stop()
}
二、日志工具
import org.apache.log4j.{
Level, Logger}trait LoggerTrait {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)Logger.getLogger("org.spark_project").setLevel(Level.WARN)}
三、Spark算子flatMap
import cn.qphone.spark.core.day2.{
LoggerTrait, SparkUtils}
import org.apache.spark.rdd.RDDobject Deom2_flatMap extends LoggerTrait{
def main(args: Array[String]): Unit = {
//1.sparkcontext获取val sc = SparkUtils.getSparkContext("Deom1_Map")//2.数据val list:List[String] = List("i am a big boy","you are a bag girl")//3.加载RDDval listRDD: RDD[String] = sc.parallelize(list,1)//4.作用flatMapval flatMapRDD: RDD[String] = listRDD.flatMap(line => line.split("\\s+"))//5.打印flatMapRDD.foreach(println)//6.释放资源SparkUtils.close(sc)}
}