BigSnarf blog

Infosec FTW

Spark Streaming Word Count from Network Socket in Scala

Screen Shot 2014-04-21 at 9.14.30 PM
package com.wordpress.bigsnarf.spark.streaming.examples

import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.StreamingContext._
import org.apache.spark.storage.StorageLevel

/**
 * Counts words in text encoded with UTF8 received from the network every second.
 */

object NetworkWordCount {
 def main(args: Array[String]) {

 if (args.length < 3) {
   System.err.println("Usage: NetworkWordCount <master> <hostname> <port>\n" +
   "In local mode, <master> should be 'local[n]' with n > 1")
   System.exit(1)
 }

 StreamingExamples.setStreamingLogLevels()

 // Spark Context
 val ssc = new StreamingContext(args(0), 
                                "NetworkWordCount", 
                                Seconds(1),
                                System.getenv("SPARK_HOME"), 
                                StreamingContext.jarOfClass(this.getClass))

 // Socket takes Text Stream
 val lines = ssc.socketTextStream(args(1), 
                                  args(2).toInt, 
                                  StorageLevel.MEMORY_ONLY_SER)
 
 // split words
 val words = lines.flatMap(_.split(" "))

 // count the words tuple -> reduce
 val wordCounts = words.map(x => (x, 1)).reduceByKey(_ + _)

 // print to screen
 wordCounts.print()
 ssc.start()
 ssc.awaitTermination()
 }
}



http://spark.apache.org/docs/latest/streaming-programming-guide.html
https://github.com/apache/spark/tree/master/examples/src/main/scala/org/apache/spark/streaming/examples
https://github.com/twitter/algebird/wiki/Algebird-Examples-with-REPL

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: