BigSnarf blog

Infosec FTW

Monthly Archives: November 2015

Malware Detection with Algebird LSH

http://arxiv.org/abs/1606.04662

Detection of polymorphic malware variants by identifying features based on static/dynamic analysis and using Locality-sensitive hashing (LSH) data structure for comparisons. Enrich? Geo? Host?

Couple papers?

http://link.springer.com/chapter/10.1007/978-3-319-23461-8_6

http://link.springer.com/chapter/10.1007/978-3-319-23461-8_8

Brute force comparison. Return distinct matches above threshold.

.flatMap { case (_, malwareIdSet) =>
      for {
        (malwareId1, sig1) <- malwareIdSet
        (malwareId2, sig2) <- malwareIdSet
        sim = minHasher.similarity(sig1, sig2)
         if (malwareId1 != malwareId2 && sim >= targetThreshold)
      } yield (malwareId1, malwareId2)
    }
    .distinct

T-Digest Algebird

https://github.com/twitter/algebird/compare/develop…erikerlandson:feature/tdigest

https://github.com/CamDavidsonPilon/tdigest

http://koff.io/posts/using-t-digest/

https://github.com/fluxcapacitor/pipeline/tree/master/myapps/spark/streaming/src/main/scala/com/advancedspark/streaming/rating/approx

Example of T-Digest plus method with Algebird Semigroup


/**
* Example of T-Digest plus method with Algebird Semigroup
*/
import com.tdunning.math.stats.TDigest
import com.twitter.algebird.{Group, Semigroup}
import io.koff.t_digest._
case object TDigestSemigroup extends Semigroup[TDigest] {
override def plus(l: TDigest, r: TDigest): TDigest = {
val td = TDigest.createDigest(math.max(l.compression(), r.compression()))
td.add(l)
td.add(r)
td
}
override def sumOption(iter: TraversableOnce[TDigest]): Option[TDigest] = {
iter.foldLeft(None: Option[TDigest]) {
case (None, el) =>
val td = TDigest.createDigest(el.compression())
td.add(el)
Some(td)
case (f@Some(acc), el) =>
acc.add(el)
f
}
}
}
val oneSecond = 1000
val twoMinutes = 2 * 60 * 1000
val tenMinutes = 10 * 60 * 1000
val twoHours = 2 * 60 * 60 * 1000
val mainValues = 10000000
val badValues = 10000
//generate 10.000.000 pseudo-random values for normal user session durations
val goodData = Generator.generate(count = mainValues, from = oneSecond, to = twoMinutes)
//generate 100.000(1%) pseudo-random values for invalid user session durations
val badData = Generator.generate(count = badValues, from = tenMinutes, to = twoHours)
val allData = goodData ++ badData
val goodDigest = TDigest.createAvlTreeDigest(100)
val badDigest = TDigest.createAvlTreeDigest(100)
val allDigest = TDigest.createAvlTreeDigest(100)
//val goodDigest = TDigest.createTreeDigest(100)
//val badDigest = TDigest.createTreeDigest(100)
//val allDigest = TDigest.createTreeDigest(100)
// add good data values to good digest
goodData.foreach(value => goodDigest.add(value))
// add bad data values to bad digest
badData.foreach(value => badDigest.add(value))
// add bad data values to bad digest
allData.foreach(value => allDigest.add(value))
//this threshold means that we expect ~0.1% of data is anomalies
val thresholdAllDigest = allDigest.quantile(0.999d).toInt
val tds = TDigestSemigroup
val plusDigest = tds.plus(goodDigest, badDigest)
val thresholdPlusDigest = plusDigest.quantile(0.999d).toInt
val thresholdGoodDigest = goodDigest.quantile(0.999d).toInt
val thresholdBadDigest = badDigest.quantile(0.999d).toInt

view raw

t-digetst.scala

hosted with ❤ by GitHub

http://erikerlandson.github.io/blog/2015/09/26/a-library-of-binary-tree-algorithms-as-mixable-scala-traits/

https://github.com/tdunning/t-digest/tree/master/src/main/java/com/tdunning/math/stats

 

https://github.com/HdrHistogram/HdrHistogram

http://www.ebaytechblog.com/2015/08/19/statistical-anomaly-detection/

 

Click to access histo.pdf

Click to access histo.pdf

Redis Analytics

linkedHashMap

I LOVE LUCY Chocolate Factory

Scala Redis LinkedHashMap implementation


/*
* Class implements a queue/cache that holds key/value for 1000 iterations and flushes
* key/value to Redis when it has been updated 1000 times.
* Uses LinkedHashMap functionality with counter on each key to flush on 1000th iteration
import com.redis._
val r = new RedisClient("localhost", 6379)
val items = Map("foo" -> "bar", "yin" -> "yang", "One" -> "1", "some_key" -> "some_value")
*/
object RedisLinkedHashMap {
import com.redis._
val capacity = 5000 //keys
val flush_interval = 1000 //flush on every 1000th update
// if the JVM gets a sys call to shutdown, this spawns a new thread to flush all the keys to Redis
Runtime.getRuntime.addShutdownHook(new Thread { override def run { flushAll } })
def uuid = java.util.UUID.randomUUID.toString
def _update(key: String, value: String) {
val setKey = r.hset("map_collection_queue", key, value)
var count = r.zincrby("flush_counter", 1, key)
println(key)
println(value)
println(count)
}
def getOrUpdate(key: String, value: String) {
var result = r.hget("map_collection_queue", key)
if (result.isEmpty) {
println("new entry")
_update(key, value)
}
else {
val check = r.zrank("flush_counter", key).get
if (r.zrank("flush_counter", key).isEmpty) {
update(key, value)
}
else {
val zValue = r.zscore("flush_counter", key).get
if (zValue == 10.0) {
println("flushing")
flush(key)
update(key, value)
}
else {
update(key, value)
}
}
}
}
def getMap: Map[String,String] = {
val ordered_keys = r.zrange("flush_counter", 0, -1)
val mapResult = ordered_keys.get.map { key =>
val value = r.hget("map_collection_queue", key).get
(key -> value)
}.toMap
mapResult
}
def getMapCounter: Map[String,Double] = {
val orderedKeys = r.zrange("flush_counter", 0, -1)
val listResult = orderedKeys.get.map { zKey =>
if (r.zrank("flush_counter", zKey).isEmpty) {
val zValue = 0
}
else {
val zValue = r.zscore("flush_counter", zKey).get
(zKey -> zValue)
}
}
val converted = for ( (x,y) <- listResult ) yield (x.toString -> y.toString.toDouble)
converted.toMap
}
def bulk(itemz: Map[String,String]){
// put items into 3 data structures in Redis
itemz.map({
case (key,value) => {
var count = r.incr("hash_counter").get.toDouble // where in the count is the next element going to be in
val setKey = r.hset("map_collection_queue", key, value) // generic unordered map
val setOrder = r.zadd("hash_order", count, key)
}
})
}
def flush(key: String) {
val deleteH = r.hdel("map_collection_queue", key)
val deleteZ = r.zrem("flush_counter", key)
println("flushing " + key)
}
def flushAll() {}
}

view raw

gistfile1.txt

hosted with ❤ by GitHub


/*
* Class implements a queue/cache that holds key/value for 1000 iterations and flushes
* key/value to Redis when it has been updated 1000 times.
* Uses LinkedHashMap functionality with counter on each key to flush on 1000th iteration
import com.redis._
val r = new RedisClient("localhost", 6379)
val items = Map("foo" -> "bar", "yin" -> "yang", "One" -> "1", "some_key" -> "some_value")
*/
object RedisLinkedHashMap {
import com.redis._
val capacity = 5000 //keys
val flush_interval = 1000 //flush on every 1000th update
// if the JVM gets a sys call to shutdown, this spawns a new thread to flush all the keys to Redis
Runtime.getRuntime.addShutdownHook(new Thread { override def run { flushAll } })
def uuid = java.util.UUID.randomUUID.toString
def _update(key: String, value: String) {
val setKey = r.hset("map_collection_queue", key, value)
var count = r.zincrby("flush_counter", 1, key)
println(key)
println(value)
println(count)
}
def getOrUpdate(key: String, value: String) {
var result = r.hget("map_collection_queue", key)
if (result.isEmpty) {
println("new entry")
_update(key, value)
}
else {
val check = r.zrank("flush_counter", key).get
if (r.zrank("flush_counter", key).isEmpty) {
update(key, value)
}
else {
val zValue = r.zscore("flush_counter", key).get
if (zValue == 10.0) {
println("flushing")
flush(key)
update(key, value)
}
else {
update(key, value)
}
}
}
}
def getMap: Map[String,String] = {
val ordered_keys = r.zrange("flush_counter", 0, -1)
val mapResult = ordered_keys.get.map { key =>
val value = r.hget("map_collection_queue", key).get
(key -> value)
}.toMap
mapResult
}
def getMapCounter: Map[String,Double] = {
val orderedKeys = r.zrange("flush_counter", 0, -1)
val listResult = orderedKeys.get.map { zKey =>
if (r.zrank("flush_counter", zKey).isEmpty) {
val zValue = 0
}
else {
val zValue = r.zscore("flush_counter", zKey).get
(zKey -> zValue)
}
}
val converted = for ( (x,y) <- listResult ) yield (x.toString -> y.toString.toDouble)
converted.toMap
}
def bulk(itemz: Map[String,String]){
// put items into 3 data structures in Redis
itemz.map({
case (key,value) => {
var count = r.incr("hash_counter").get.toDouble // where in the count is the next element going to be in
val setKey = r.hset("map_collection_queue", key, value) // generic unordered map
val setOrder = r.zadd("hash_order", count, key)
}
})
}
def flush(key: String) {
val deleteH = r.hdel("map_collection_queue", key)
val deleteZ = r.zrem("flush_counter", key)
println("flushing " + key)
}
def flushAll() {}
}

view raw

gistfile1.txt

hosted with ❤ by GitHub

 

Links