//scala wordcount example
import scala.io.Source
val lines = Source.fromFile("README.md").getLines.toArray
val emptyCounts = Map[String,Int]().withDefaultValue(0)
words.length
val counts = words.foldLeft(emptyCounts)({(currentCounts: Map[String,Int], word: String) => currentCounts.updated(word, currentCounts(word) + 1)})
//algebird hyperloglog
import HyperLogLog._
val hll = new HyperLogLogMonoid(4)
val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5)
val seqHll = data.map { hll(_) }
val sumHll = hll.sum(seqHll)
val approxSizeOf = hll.sizeOf(sumHll)
val actualSize = data.toSet.size
val estimate = approxSizeOf.estimate
//algebird bloomfilter
import com.twitter.algebird._
val NUM_HASHES = 6
val WIDTH = 32
val SEED = 1
val bfMonoid = new BloomFilterMonoid(NUM_HASHES, WIDTH, SEED)
val bf = bfMonoid.create("1", "2", "3", "4", "100")
val approxBool = bf.contains("1")
val res = approxBool.isTrue
//algebird countMinSketch
import com.twitter.algebird._
val DELTA = 1E-10
val EPS = 0.001
val SEED = 1
val CMS_MONOID = new CountMinSketchMonoid(EPS, DELTA, SEED)
val data = List(1L, 1L, 3L, 4L, 5L)
val cms = CMS_MONOID.create(data)
cms.totalCount
cms.frequency(1L).estimate
cms.frequency(2L).estimate
cms.frequency(3L).estimate
val data = List("1", "2", "3", "4", "5")
val cms = CMS_MONOID.create(data)
//sketch map
import com.twitter.algebird._
val DELTA = 1E-8
val EPS = 0.001
val SEED = 1
val HEAVY_HITTERS_COUNT = 10
implicit def string2Bytes(i : String) = i.toCharArray.map(_.toByte)
val PARAMS = SketchMapParams[String](SEED, EPS, DELTA, HEAVY_HITTERS_COUNT)
val MONOID = SketchMap.monoid[String, Long](PARAMS)
val data = List( ("1", 1L), ("3", 2L), ("4", 1L), ("5", 1L) )
val sm = MONOID.create(data)
sm.totalValue
MONOID.frequency(sm, "1")
MONOID.frequency(sm, "2")
MONOID.frequency(sm, "3")
https://github.com/twitter/algebird/wiki/Algebird-Examples-with-REPL