BigSnarf blog

Infosec FTW

python hyperloglog and webscale counters

from mmhash import mmhash
from math import log
from zlib import compress
from base64 import b64encode
class HyperLogLog:
 def __init__(self, log2m):
 self.log2m = log2m
 self.m = 1 << log2m = [0]*self.m
 self.alphaMM = (0.7213 / (1 + 1.079 / self.m)) * self.m * self.def offer(self, o):
 x = mmhash(str(o), 0)
 a, b = 32-self.log2m, self.log2m
 i = x >> a
 v = self._bitscan(x << b, a)[i] = max([i], v)
def count(self):
 estimate = self.alphaMM / sum([2**-v for v in])
 if estimate <= 2.5 * self.m:
 zeros = float(
 return round(-self.m * log(zeros / self.m))
 return round(estimate)

 def _bitscan(self, x, m):
 v = 1
 while v<=m and not x&0x80000000:
 return v

 def datastr(self):
 return b64encode(compress(str.join('', map(chr,, 9))

Leave a Reply

Fill in your details below or click an icon to log in: Logo

You are commenting using your account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s

%d bloggers like this: