BigSnarf blog

Infosec FTW

Python Spark processing auth.log file

Screen Shot 2014-04-22 at 8.55.21 PM

"""
auth log analysis with spark
SimpleApp.py
"""
import sys
from operator import add
from time import strftime
from pyspark import SparkContext

outFile = "counts" + strftime("%Y-%m-%d")
logFile = "auth.log"
destination = "local"
appName = "Simple App"

sc = SparkContext(destination, appName)
logData = sc.textFile(logFile).cache()
failedData = logData.filter(lambda x: 'Failed' in x)
rootData = failedData.filter(lambda s: 'root' in s)

def splitville(line):
 return line.split("from ")[1].split()[0]

ipAddresses = rootData.map(splitville)
counts = ipAddresses.map(lambda x: (x, 1)).reduceByKey(add)
output = counts.collect()
for (word, count) in output:
 print "%s: %i" % (word, count)
counts.saveAsTextFile(outFile)




https://github.com/bigsnarfdude/PythonSystemAdminTools/blob/master/SimpleApp.py

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s

%d bloggers like this: