checkpoint

scala> sc.textFile("hdfs://hadoop21:9000/wc/").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
res10: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[29] at reduceByKey at <console>:25

scala> sc.setCheckpointDir("hdfs://hadoop21:9000/ck0001")

scala> val res = sc.textFile("hdfs://hadoop21:9000/wc/").flatMap(_.split(" ")).map((_,1)).reduceByKey(_+_)
res: org.apache.spark.rdd.RDD[(String, Int)] = ShuffledRDD[34] at reduceByKey at <console>:24

scala> res.cache()
res12: res.type = ShuffledRDD[34] at reduceByKey at <console>:24

scala> res.checkpoint()

scala> res.collect
res14: Array[(String, Int)] = Array((tom,11), (hello,22), (jerry,6), (kitty,1), (hanmeimei,2), (lilei,2))


推荐阅读更多精彩内容