BigSnarf blog

Infosec FTW

Monthly Archives: May 2016

Good Sources of Labelled Security Attack Data – The ongoing challenge

Screen Shot 2016-05-09 at 8.15.43 AM

We all need to watch for compromising account credentials.

  • password brute forcing/password guessing
  • password reset
  • phishing/whaling
  • credential leaks/harvesting
  • drive by compromise

How do you watch this stuff in the cloud? Workstations? Users?  Account breaches increase risk and gives a “bad guy” anywhere, anytime access.

Also, in regards to this interesting slide above from RSA conference. I would add:

  • Crawl – Public Data
  • Walk – HoneyPot Data
  • Jog – Red Team Data
  • Run – Shared Normalized Breach Data and Attach Methodology for PP rules (IMHO)

Retraining Inception 3 Tensorflow to recognize new task

Tensorflow Facial Key Points

Screen Shot 2016-05-06 at 12.02.36 AM

targets = ["left_eye_center_x", "left_eye_center_y", "right_eye_center_x", "right_eye_center_y",
"left_eye_inner_corner_x", "left_eye_inner_corner_y", "left_eye_outer_corner_x",
"left_eye_outer_corner_y", "right_eye_inner_corner_x", "right_eye_inner_corner_y",
"right_eye_outer_corner_x", "right_eye_outer_corner_y", "left_eyebrow_inner_end_x",
"left_eyebrow_inner_end_y", "left_eyebrow_outer_end_x", "left_eyebrow_outer_end_y",
"right_eyebrow_inner_end_x", "right_eyebrow_inner_end_y", "right_eyebrow_outer_end_x",
"right_eyebrow_outer_end_y", "nose_tip_x", "nose_tip_y", "mouth_left_corner_x",
"mouth_left_corner_y", "mouth_right_corner_x", "mouth_right_corner_y",
"mouth_center_top_lip_x", "mouth_center_top_lip_y", "mouth_center_bottom_lip_x",
targets = np.array(targets)
def generate_dataset(target, ratio_validation_data):
target : list of ints
the indices of the targets variable to be trained on
ratio_validation_data : float
Fraction of the data to use as a validation set
# Read in raw data
training = pd.read_csv("data/training.csv")
test = pd.read_csv("data/test.csv")
# Keep only the relevant columns
drop_columns = set(training.columns.values).difference({"Image"}.union(targets[target]))
training.drop(drop_columns, inplace=True, axis=1)
# Drop NaN
# Generate 96×96 images from the string of intensities
training.Image = training.Image.apply(lambda x: np.array(x.split(" ")).astype(float).reshape((96, 96)))
test.Image = test.Image.apply(lambda x: np.array(x.split(" ")).astype(float).reshape((96, 96)))
# Rescale images to interval [0, 1]
training.Image = training.Image.apply(lambda x: (x x.min()) / (x.max() x.min()))
test.Image = test.Image.apply(lambda x: (x x.min()) / (x.max() x.min()))
# Data augmentation
training.Image = training.Image.apply(lambda x: augment(x))
test.Image = test.Image.apply(lambda x: augment(x))
# Final resizing for stacking
training.Image = training.Image.apply(lambda x: x[np.newaxis, :])
test.Image = test.Image.apply(lambda x: x[np.newaxis, :])
# Split
X_train, X_validate, y_train, y_validate = train_test_split(training.Image,
training.drop(["Image"], axis=1).values,
return (

Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

Anomaly Detection Python T-Digest

import os
import pickle
import codecs
import base64
import sqlite3
from collections import namedtuple
import random
import tdigest
# Simple example to persist data to an ID
# using pickle -> base64 -> string
# and storing external objects by reference in a database
# Simple class representing a record in our database.
TDigestRecord = namedtuple("TDigestRecord", "key, data")
class DBPickler(pickle.Pickler):
def persistent_id(self, obj):
# Instead of pickling TDigestRecord as a regular class instance, we emit a
# persistent ID.
if isinstance(obj, TDigestRecord):
# Here, our persistent ID is simply a tuple, containing a tag and a
# key, which refers to a specific record in the database.
return ("TDigestRecord", obj.key)
# If obj does not have a persistent ID, return None. This means obj
# needs to be pickled as usual.
return None
class DBUnpickler(pickle.Unpickler):
def __init__(self, file, connection):
self.connection = connection
def persistent_load(self, pid):
# This method is invoked whenever a persistent ID is encountered.
# Here, pid is the tuple returned by DBPickler.
cursor = self.connection.cursor()
type_tag, key_id = pid
if type_tag == "TDigestRecord":
# Fetch the referenced record from the database and return it.
cursor.execute("SELECT * FROM pages WHERE key=?", (str(key_id),))
key, data = cursor.fetchone()
return TDigestRecord(key, data)
# Always raises an error if you cannot return the correct object.
# Otherwise, the unpickler will think None is the object referenced
# by the persistent ID.
raise pickle.UnpicklingError("unsupported persistent object")
def pickle_base64_stringify(obj):
return codecs.encode(pickle.dumps(obj), "base64").decode()
def string_base64_pickle(obj):
return pickle.loads(codecs.decode(obj.encode(), "base64"))
def main():
import io
import pprint
# Initialize and populate our database.
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()
cursor.execute("CREATE TABLE pages(key INTEGER PRIMARY KEY, data TEXT)")
datas = []
for i in range(1,30):
temp = tdigest.TDigest()
for x in [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]:
temp_value = 1000 * x * random.random()
for data in datas:
cursor.execute("INSERT INTO pages VALUES(NULL, ?)", (data.replace("\n",""),))
# Fetch the records to be pickled.
cursor.execute("SELECT * FROM pages")
pages = [TDigestRecord(key, data) for key, data in cursor]
# Save the records using our custom DBPickler.
file = io.BytesIO()
print("Pickled records:")
# Update a record, just for good measure.
cursor.execute("UPDATE pages SET data='learn tdigest' WHERE key=1")
# Load the records from the pickle data stream.
pages = DBUnpickler(file, conn).load()
print("Unpickled records:")
# Restore all records back to inserted numbers
for tdigest_obj in pages:
restored = string_base64_pickle(
restored =
print((tdigest_obj.key, restored))
if __name__ == '__main__':

view raw


hosted with ❤ by GitHub

Click to access hotcloud14-vallis.pdf

Screen Shot 2016-05-01 at 12.16.13 AM

Parameterized anomaly detection settings


Event correlation is a technique for making sense of a large number of events and pinpointing the few events that are really important in that mass of information. This is accomplished by looking for and analyzing relationships between events.