BigSnarf blog

Infosec FTW

Python Spark processing auth.log file

Screen Shot 2014-04-22 at 8.55.21 PM

auth log analysis with spark
import sys
from operator import add
from time import strftime
from pyspark import SparkContext

outFile = "counts" + strftime("%Y-%m-%d")
logFile = "auth.log"
destination = "local"
appName = "Simple App"

sc = SparkContext(destination, appName)
logData = sc.textFile(logFile).cache()
failedData = logData.filter(lambda x: 'Failed' in x)
rootData = failedData.filter(lambda s: 'root' in s)

def splitville(line):
 return line.split("from ")[1].split()[0]

ipAddresses =
counts = x: (x, 1)).reduceByKey(add)
output = counts.collect()
for (word, count) in output:
 print "%s: %i" % (word, count)

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: