Update dependencies

This commit is contained in:
Manuel de Brito Fontes 2017-10-06 17:33:32 -03:00
parent bf5616c65b
commit d6d374b28d
13962 changed files with 48226 additions and 3618880 deletions

View file

@ -1,26 +0,0 @@
package histogram
import (
"math/rand"
"testing"
)
func BenchmarkInsert10Bins(b *testing.B) {
b.StopTimer()
h := New(10)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}
func BenchmarkInsert100Bins(b *testing.B) {
b.StopTimer()
h := New(100)
b.StartTimer()
for i := 0; i < b.N; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
}

View file

@ -1,108 +0,0 @@
// Package histogram provides a Go implementation of BigML's histogram package
// for Clojure/Java. It is currently experimental.
package histogram
import (
"container/heap"
"math"
"sort"
)
type Bin struct {
Count int
Sum float64
}
func (b *Bin) Update(x *Bin) {
b.Count += x.Count
b.Sum += x.Sum
}
func (b *Bin) Mean() float64 {
return b.Sum / float64(b.Count)
}
type Bins []*Bin
func (bs Bins) Len() int { return len(bs) }
func (bs Bins) Less(i, j int) bool { return bs[i].Mean() < bs[j].Mean() }
func (bs Bins) Swap(i, j int) { bs[i], bs[j] = bs[j], bs[i] }
func (bs *Bins) Push(x interface{}) {
*bs = append(*bs, x.(*Bin))
}
func (bs *Bins) Pop() interface{} {
return bs.remove(len(*bs) - 1)
}
func (bs *Bins) remove(n int) *Bin {
if n < 0 || len(*bs) < n {
return nil
}
x := (*bs)[n]
*bs = append((*bs)[:n], (*bs)[n+1:]...)
return x
}
type Histogram struct {
res *reservoir
}
func New(maxBins int) *Histogram {
return &Histogram{res: newReservoir(maxBins)}
}
func (h *Histogram) Insert(f float64) {
h.res.insert(&Bin{1, f})
h.res.compress()
}
func (h *Histogram) Bins() Bins {
return h.res.bins
}
type reservoir struct {
n int
maxBins int
bins Bins
}
func newReservoir(maxBins int) *reservoir {
return &reservoir{maxBins: maxBins}
}
func (r *reservoir) insert(bin *Bin) {
r.n += bin.Count
i := sort.Search(len(r.bins), func(i int) bool {
return r.bins[i].Mean() >= bin.Mean()
})
if i < 0 || i == r.bins.Len() {
// TODO(blake): Maybe use an .insert(i, bin) instead of
// performing the extra work of a heap.Push.
heap.Push(&r.bins, bin)
return
}
r.bins[i].Update(bin)
}
func (r *reservoir) compress() {
for r.bins.Len() > r.maxBins {
minGapIndex := -1
minGap := math.MaxFloat64
for i := 0; i < r.bins.Len()-1; i++ {
gap := gapWeight(r.bins[i], r.bins[i+1])
if minGap > gap {
minGap = gap
minGapIndex = i
}
}
prev := r.bins[minGapIndex]
next := r.bins.remove(minGapIndex + 1)
prev.Update(next)
}
}
func gapWeight(prev, next *Bin) float64 {
return next.Mean() - prev.Mean()
}

View file

@ -1,38 +0,0 @@
package histogram
import (
"math/rand"
"testing"
)
func TestHistogram(t *testing.T) {
const numPoints = 1e6
const maxBins = 3
h := New(maxBins)
for i := 0; i < numPoints; i++ {
f := rand.ExpFloat64()
h.Insert(f)
}
bins := h.Bins()
if g := len(bins); g > maxBins {
t.Fatalf("got %d bins, wanted <= %d", g, maxBins)
}
for _, b := range bins {
t.Logf("%+v", b)
}
if g := count(h.Bins()); g != numPoints {
t.Fatalf("binned %d points, wanted %d", g, numPoints)
}
}
func count(bins Bins) int {
binCounts := 0
for _, b := range bins {
binCounts += b.Count
}
return binCounts
}

View file

@ -1,90 +0,0 @@
package topk
import (
"sort"
)
// http://www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf
type Element struct {
Value string
Count int
}
type Samples []*Element
func (sm Samples) Len() int {
return len(sm)
}
func (sm Samples) Less(i, j int) bool {
return sm[i].Count < sm[j].Count
}
func (sm Samples) Swap(i, j int) {
sm[i], sm[j] = sm[j], sm[i]
}
type Stream struct {
k int
mon map[string]*Element
// the minimum Element
min *Element
}
func New(k int) *Stream {
s := new(Stream)
s.k = k
s.mon = make(map[string]*Element)
s.min = &Element{}
// Track k+1 so that less frequenet items contended for that spot,
// resulting in k being more accurate.
return s
}
func (s *Stream) Insert(x string) {
s.insert(&Element{x, 1})
}
func (s *Stream) Merge(sm Samples) {
for _, e := range sm {
s.insert(e)
}
}
func (s *Stream) insert(in *Element) {
e := s.mon[in.Value]
if e != nil {
e.Count++
} else {
if len(s.mon) < s.k+1 {
e = &Element{in.Value, in.Count}
s.mon[in.Value] = e
} else {
e = s.min
delete(s.mon, e.Value)
e.Value = in.Value
e.Count += in.Count
s.min = e
}
}
if e.Count < s.min.Count {
s.min = e
}
}
func (s *Stream) Query() Samples {
var sm Samples
for _, e := range s.mon {
sm = append(sm, e)
}
sort.Sort(sort.Reverse(sm))
if len(sm) < s.k {
return sm
}
return sm[:s.k]
}

View file

@ -1,57 +0,0 @@
package topk
import (
"fmt"
"math/rand"
"sort"
"testing"
)
func TestTopK(t *testing.T) {
stream := New(10)
ss := []*Stream{New(10), New(10), New(10)}
m := make(map[string]int)
for _, s := range ss {
for i := 0; i < 1e6; i++ {
v := fmt.Sprintf("%x", int8(rand.ExpFloat64()))
s.Insert(v)
m[v]++
}
stream.Merge(s.Query())
}
var sm Samples
for x, s := range m {
sm = append(sm, &Element{x, s})
}
sort.Sort(sort.Reverse(sm))
g := stream.Query()
if len(g) != 10 {
t.Fatalf("got %d, want 10", len(g))
}
for i, e := range g {
if sm[i].Value != e.Value {
t.Errorf("at %d: want %q, got %q", i, sm[i].Value, e.Value)
}
}
}
func TestQuery(t *testing.T) {
queryTests := []struct {
value string
expected int
}{
{"a", 1},
{"b", 2},
{"c", 2},
}
stream := New(2)
for _, tt := range queryTests {
stream.Insert(tt.value)
if n := len(stream.Query()); n != tt.expected {
t.Errorf("want %d, got %d", tt.expected, n)
}
}
}