Update go dependencies for kubernetes 1.16.0
This commit is contained in:
parent
280920980d
commit
d7b530cb0a
510 changed files with 107206 additions and 52551 deletions
712
vendor/golang.org/x/net/html/atom/gen.go
generated
vendored
712
vendor/golang.org/x/net/html/atom/gen.go
generated
vendored
|
|
@ -1,712 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
//go:generate go run gen.go
|
||||
//go:generate go run gen.go -test
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"io/ioutil"
|
||||
"math/rand"
|
||||
"os"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// identifier converts s to a Go exported identifier.
|
||||
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
|
||||
func identifier(s string) string {
|
||||
b := make([]byte, 0, len(s))
|
||||
cap := true
|
||||
for _, c := range s {
|
||||
if c == '-' {
|
||||
cap = true
|
||||
continue
|
||||
}
|
||||
if cap && 'a' <= c && c <= 'z' {
|
||||
c -= 'a' - 'A'
|
||||
}
|
||||
cap = false
|
||||
b = append(b, byte(c))
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
var test = flag.Bool("test", false, "generate table_test.go")
|
||||
|
||||
func genFile(name string, buf *bytes.Buffer) {
|
||||
b, err := format.Source(buf.Bytes())
|
||||
if err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
if err := ioutil.WriteFile(name, b, 0644); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
var all []string
|
||||
all = append(all, elements...)
|
||||
all = append(all, attributes...)
|
||||
all = append(all, eventHandlers...)
|
||||
all = append(all, extra...)
|
||||
sort.Strings(all)
|
||||
|
||||
// uniq - lists have dups
|
||||
w := 0
|
||||
for _, s := range all {
|
||||
if w == 0 || all[w-1] != s {
|
||||
all[w] = s
|
||||
w++
|
||||
}
|
||||
}
|
||||
all = all[:w]
|
||||
|
||||
if *test {
|
||||
var buf bytes.Buffer
|
||||
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
|
||||
fmt.Fprintln(&buf, "//go:generate go run gen.go -test\n")
|
||||
fmt.Fprintln(&buf, "package atom\n")
|
||||
fmt.Fprintln(&buf, "var testAtomList = []string{")
|
||||
for _, s := range all {
|
||||
fmt.Fprintf(&buf, "\t%q,\n", s)
|
||||
}
|
||||
fmt.Fprintln(&buf, "}")
|
||||
|
||||
genFile("table_test.go", &buf)
|
||||
return
|
||||
}
|
||||
|
||||
// Find hash that minimizes table size.
|
||||
var best *table
|
||||
for i := 0; i < 1000000; i++ {
|
||||
if best != nil && 1<<(best.k-1) < len(all) {
|
||||
break
|
||||
}
|
||||
h := rand.Uint32()
|
||||
for k := uint(0); k <= 16; k++ {
|
||||
if best != nil && k >= best.k {
|
||||
break
|
||||
}
|
||||
var t table
|
||||
if t.init(h, k, all) {
|
||||
best = &t
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if best == nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
// Lay out strings, using overlaps when possible.
|
||||
layout := append([]string{}, all...)
|
||||
|
||||
// Remove strings that are substrings of other strings
|
||||
for changed := true; changed; {
|
||||
changed = false
|
||||
for i, s := range layout {
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
for j, t := range layout {
|
||||
if i != j && t != "" && strings.Contains(s, t) {
|
||||
changed = true
|
||||
layout[j] = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Join strings where one suffix matches another prefix.
|
||||
for {
|
||||
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
|
||||
// maximizing overlap length k.
|
||||
besti := -1
|
||||
bestj := -1
|
||||
bestk := 0
|
||||
for i, s := range layout {
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
for j, t := range layout {
|
||||
if i == j {
|
||||
continue
|
||||
}
|
||||
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
|
||||
if s[len(s)-k:] == t[:k] {
|
||||
besti = i
|
||||
bestj = j
|
||||
bestk = k
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if bestk > 0 {
|
||||
layout[besti] += layout[bestj][bestk:]
|
||||
layout[bestj] = ""
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
text := strings.Join(layout, "")
|
||||
|
||||
atom := map[string]uint32{}
|
||||
for _, s := range all {
|
||||
off := strings.Index(text, s)
|
||||
if off < 0 {
|
||||
panic("lost string " + s)
|
||||
}
|
||||
atom[s] = uint32(off<<8 | len(s))
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
// Generate the Go code.
|
||||
fmt.Fprintln(&buf, "// Code generated by go generate gen.go; DO NOT EDIT.\n")
|
||||
fmt.Fprintln(&buf, "//go:generate go run gen.go\n")
|
||||
fmt.Fprintln(&buf, "package atom\n\nconst (")
|
||||
|
||||
// compute max len
|
||||
maxLen := 0
|
||||
for _, s := range all {
|
||||
if maxLen < len(s) {
|
||||
maxLen = len(s)
|
||||
}
|
||||
fmt.Fprintf(&buf, "\t%s Atom = %#x\n", identifier(s), atom[s])
|
||||
}
|
||||
fmt.Fprintln(&buf, ")\n")
|
||||
|
||||
fmt.Fprintf(&buf, "const hash0 = %#x\n\n", best.h0)
|
||||
fmt.Fprintf(&buf, "const maxAtomLen = %d\n\n", maxLen)
|
||||
|
||||
fmt.Fprintf(&buf, "var table = [1<<%d]Atom{\n", best.k)
|
||||
for i, s := range best.tab {
|
||||
if s == "" {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(&buf, "\t%#x: %#x, // %s\n", i, atom[s], s)
|
||||
}
|
||||
fmt.Fprintf(&buf, "}\n")
|
||||
datasize := (1 << best.k) * 4
|
||||
|
||||
fmt.Fprintln(&buf, "const atomText =")
|
||||
textsize := len(text)
|
||||
for len(text) > 60 {
|
||||
fmt.Fprintf(&buf, "\t%q +\n", text[:60])
|
||||
text = text[60:]
|
||||
}
|
||||
fmt.Fprintf(&buf, "\t%q\n\n", text)
|
||||
|
||||
genFile("table.go", &buf)
|
||||
|
||||
fmt.Fprintf(os.Stdout, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
|
||||
}
|
||||
|
||||
type byLen []string
|
||||
|
||||
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
|
||||
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
||||
func (x byLen) Len() int { return len(x) }
|
||||
|
||||
// fnv computes the FNV hash with an arbitrary starting value h.
|
||||
func fnv(h uint32, s string) uint32 {
|
||||
for i := 0; i < len(s); i++ {
|
||||
h ^= uint32(s[i])
|
||||
h *= 16777619
|
||||
}
|
||||
return h
|
||||
}
|
||||
|
||||
// A table represents an attempt at constructing the lookup table.
|
||||
// The lookup table uses cuckoo hashing, meaning that each string
|
||||
// can be found in one of two positions.
|
||||
type table struct {
|
||||
h0 uint32
|
||||
k uint
|
||||
mask uint32
|
||||
tab []string
|
||||
}
|
||||
|
||||
// hash returns the two hashes for s.
|
||||
func (t *table) hash(s string) (h1, h2 uint32) {
|
||||
h := fnv(t.h0, s)
|
||||
h1 = h & t.mask
|
||||
h2 = (h >> 16) & t.mask
|
||||
return
|
||||
}
|
||||
|
||||
// init initializes the table with the given parameters.
|
||||
// h0 is the initial hash value,
|
||||
// k is the number of bits of hash value to use, and
|
||||
// x is the list of strings to store in the table.
|
||||
// init returns false if the table cannot be constructed.
|
||||
func (t *table) init(h0 uint32, k uint, x []string) bool {
|
||||
t.h0 = h0
|
||||
t.k = k
|
||||
t.tab = make([]string, 1<<k)
|
||||
t.mask = 1<<k - 1
|
||||
for _, s := range x {
|
||||
if !t.insert(s) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// insert inserts s in the table.
|
||||
func (t *table) insert(s string) bool {
|
||||
h1, h2 := t.hash(s)
|
||||
if t.tab[h1] == "" {
|
||||
t.tab[h1] = s
|
||||
return true
|
||||
}
|
||||
if t.tab[h2] == "" {
|
||||
t.tab[h2] = s
|
||||
return true
|
||||
}
|
||||
if t.push(h1, 0) {
|
||||
t.tab[h1] = s
|
||||
return true
|
||||
}
|
||||
if t.push(h2, 0) {
|
||||
t.tab[h2] = s
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// push attempts to push aside the entry in slot i.
|
||||
func (t *table) push(i uint32, depth int) bool {
|
||||
if depth > len(t.tab) {
|
||||
return false
|
||||
}
|
||||
s := t.tab[i]
|
||||
h1, h2 := t.hash(s)
|
||||
j := h1 + h2 - i
|
||||
if t.tab[j] != "" && !t.push(j, depth+1) {
|
||||
return false
|
||||
}
|
||||
t.tab[j] = s
|
||||
return true
|
||||
}
|
||||
|
||||
// The lists of element names and attribute keys were taken from
|
||||
// https://html.spec.whatwg.org/multipage/indices.html#index
|
||||
// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
|
||||
|
||||
// "command", "keygen" and "menuitem" have been removed from the spec,
|
||||
// but are kept here for backwards compatibility.
|
||||
var elements = []string{
|
||||
"a",
|
||||
"abbr",
|
||||
"address",
|
||||
"area",
|
||||
"article",
|
||||
"aside",
|
||||
"audio",
|
||||
"b",
|
||||
"base",
|
||||
"bdi",
|
||||
"bdo",
|
||||
"blockquote",
|
||||
"body",
|
||||
"br",
|
||||
"button",
|
||||
"canvas",
|
||||
"caption",
|
||||
"cite",
|
||||
"code",
|
||||
"col",
|
||||
"colgroup",
|
||||
"command",
|
||||
"data",
|
||||
"datalist",
|
||||
"dd",
|
||||
"del",
|
||||
"details",
|
||||
"dfn",
|
||||
"dialog",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"embed",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hgroup",
|
||||
"hr",
|
||||
"html",
|
||||
"i",
|
||||
"iframe",
|
||||
"img",
|
||||
"input",
|
||||
"ins",
|
||||
"kbd",
|
||||
"keygen",
|
||||
"label",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"main",
|
||||
"map",
|
||||
"mark",
|
||||
"menu",
|
||||
"menuitem",
|
||||
"meta",
|
||||
"meter",
|
||||
"nav",
|
||||
"noscript",
|
||||
"object",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"output",
|
||||
"p",
|
||||
"param",
|
||||
"picture",
|
||||
"pre",
|
||||
"progress",
|
||||
"q",
|
||||
"rp",
|
||||
"rt",
|
||||
"ruby",
|
||||
"s",
|
||||
"samp",
|
||||
"script",
|
||||
"section",
|
||||
"select",
|
||||
"slot",
|
||||
"small",
|
||||
"source",
|
||||
"span",
|
||||
"strong",
|
||||
"style",
|
||||
"sub",
|
||||
"summary",
|
||||
"sup",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"template",
|
||||
"textarea",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"time",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"u",
|
||||
"ul",
|
||||
"var",
|
||||
"video",
|
||||
"wbr",
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
|
||||
//
|
||||
// "challenge", "command", "contextmenu", "dropzone", "icon", "keytype", "mediagroup",
|
||||
// "radiogroup", "spellcheck", "scoped", "seamless", "sortable" and "sorted" have been removed from the spec,
|
||||
// but are kept here for backwards compatibility.
|
||||
var attributes = []string{
|
||||
"abbr",
|
||||
"accept",
|
||||
"accept-charset",
|
||||
"accesskey",
|
||||
"action",
|
||||
"allowfullscreen",
|
||||
"allowpaymentrequest",
|
||||
"allowusermedia",
|
||||
"alt",
|
||||
"as",
|
||||
"async",
|
||||
"autocomplete",
|
||||
"autofocus",
|
||||
"autoplay",
|
||||
"challenge",
|
||||
"charset",
|
||||
"checked",
|
||||
"cite",
|
||||
"class",
|
||||
"color",
|
||||
"cols",
|
||||
"colspan",
|
||||
"command",
|
||||
"content",
|
||||
"contenteditable",
|
||||
"contextmenu",
|
||||
"controls",
|
||||
"coords",
|
||||
"crossorigin",
|
||||
"data",
|
||||
"datetime",
|
||||
"default",
|
||||
"defer",
|
||||
"dir",
|
||||
"dirname",
|
||||
"disabled",
|
||||
"download",
|
||||
"draggable",
|
||||
"dropzone",
|
||||
"enctype",
|
||||
"for",
|
||||
"form",
|
||||
"formaction",
|
||||
"formenctype",
|
||||
"formmethod",
|
||||
"formnovalidate",
|
||||
"formtarget",
|
||||
"headers",
|
||||
"height",
|
||||
"hidden",
|
||||
"high",
|
||||
"href",
|
||||
"hreflang",
|
||||
"http-equiv",
|
||||
"icon",
|
||||
"id",
|
||||
"inputmode",
|
||||
"integrity",
|
||||
"is",
|
||||
"ismap",
|
||||
"itemid",
|
||||
"itemprop",
|
||||
"itemref",
|
||||
"itemscope",
|
||||
"itemtype",
|
||||
"keytype",
|
||||
"kind",
|
||||
"label",
|
||||
"lang",
|
||||
"list",
|
||||
"loop",
|
||||
"low",
|
||||
"manifest",
|
||||
"max",
|
||||
"maxlength",
|
||||
"media",
|
||||
"mediagroup",
|
||||
"method",
|
||||
"min",
|
||||
"minlength",
|
||||
"multiple",
|
||||
"muted",
|
||||
"name",
|
||||
"nomodule",
|
||||
"nonce",
|
||||
"novalidate",
|
||||
"open",
|
||||
"optimum",
|
||||
"pattern",
|
||||
"ping",
|
||||
"placeholder",
|
||||
"playsinline",
|
||||
"poster",
|
||||
"preload",
|
||||
"radiogroup",
|
||||
"readonly",
|
||||
"referrerpolicy",
|
||||
"rel",
|
||||
"required",
|
||||
"reversed",
|
||||
"rows",
|
||||
"rowspan",
|
||||
"sandbox",
|
||||
"spellcheck",
|
||||
"scope",
|
||||
"scoped",
|
||||
"seamless",
|
||||
"selected",
|
||||
"shape",
|
||||
"size",
|
||||
"sizes",
|
||||
"sortable",
|
||||
"sorted",
|
||||
"slot",
|
||||
"span",
|
||||
"spellcheck",
|
||||
"src",
|
||||
"srcdoc",
|
||||
"srclang",
|
||||
"srcset",
|
||||
"start",
|
||||
"step",
|
||||
"style",
|
||||
"tabindex",
|
||||
"target",
|
||||
"title",
|
||||
"translate",
|
||||
"type",
|
||||
"typemustmatch",
|
||||
"updateviacache",
|
||||
"usemap",
|
||||
"value",
|
||||
"width",
|
||||
"workertype",
|
||||
"wrap",
|
||||
}
|
||||
|
||||
// "onautocomplete", "onautocompleteerror", "onmousewheel",
|
||||
// "onshow" and "onsort" have been removed from the spec,
|
||||
// but are kept here for backwards compatibility.
|
||||
var eventHandlers = []string{
|
||||
"onabort",
|
||||
"onautocomplete",
|
||||
"onautocompleteerror",
|
||||
"onauxclick",
|
||||
"onafterprint",
|
||||
"onbeforeprint",
|
||||
"onbeforeunload",
|
||||
"onblur",
|
||||
"oncancel",
|
||||
"oncanplay",
|
||||
"oncanplaythrough",
|
||||
"onchange",
|
||||
"onclick",
|
||||
"onclose",
|
||||
"oncontextmenu",
|
||||
"oncopy",
|
||||
"oncuechange",
|
||||
"oncut",
|
||||
"ondblclick",
|
||||
"ondrag",
|
||||
"ondragend",
|
||||
"ondragenter",
|
||||
"ondragexit",
|
||||
"ondragleave",
|
||||
"ondragover",
|
||||
"ondragstart",
|
||||
"ondrop",
|
||||
"ondurationchange",
|
||||
"onemptied",
|
||||
"onended",
|
||||
"onerror",
|
||||
"onfocus",
|
||||
"onhashchange",
|
||||
"oninput",
|
||||
"oninvalid",
|
||||
"onkeydown",
|
||||
"onkeypress",
|
||||
"onkeyup",
|
||||
"onlanguagechange",
|
||||
"onload",
|
||||
"onloadeddata",
|
||||
"onloadedmetadata",
|
||||
"onloadend",
|
||||
"onloadstart",
|
||||
"onmessage",
|
||||
"onmessageerror",
|
||||
"onmousedown",
|
||||
"onmouseenter",
|
||||
"onmouseleave",
|
||||
"onmousemove",
|
||||
"onmouseout",
|
||||
"onmouseover",
|
||||
"onmouseup",
|
||||
"onmousewheel",
|
||||
"onwheel",
|
||||
"onoffline",
|
||||
"ononline",
|
||||
"onpagehide",
|
||||
"onpageshow",
|
||||
"onpaste",
|
||||
"onpause",
|
||||
"onplay",
|
||||
"onplaying",
|
||||
"onpopstate",
|
||||
"onprogress",
|
||||
"onratechange",
|
||||
"onreset",
|
||||
"onresize",
|
||||
"onrejectionhandled",
|
||||
"onscroll",
|
||||
"onsecuritypolicyviolation",
|
||||
"onseeked",
|
||||
"onseeking",
|
||||
"onselect",
|
||||
"onshow",
|
||||
"onsort",
|
||||
"onstalled",
|
||||
"onstorage",
|
||||
"onsubmit",
|
||||
"onsuspend",
|
||||
"ontimeupdate",
|
||||
"ontoggle",
|
||||
"onunhandledrejection",
|
||||
"onunload",
|
||||
"onvolumechange",
|
||||
"onwaiting",
|
||||
}
|
||||
|
||||
// extra are ad-hoc values not covered by any of the lists above.
|
||||
var extra = []string{
|
||||
"acronym",
|
||||
"align",
|
||||
"annotation",
|
||||
"annotation-xml",
|
||||
"applet",
|
||||
"basefont",
|
||||
"bgsound",
|
||||
"big",
|
||||
"blink",
|
||||
"center",
|
||||
"color",
|
||||
"desc",
|
||||
"face",
|
||||
"font",
|
||||
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
|
||||
"foreignobject",
|
||||
"frame",
|
||||
"frameset",
|
||||
"image",
|
||||
"isindex",
|
||||
"listing",
|
||||
"malignmark",
|
||||
"marquee",
|
||||
"math",
|
||||
"mglyph",
|
||||
"mi",
|
||||
"mn",
|
||||
"mo",
|
||||
"ms",
|
||||
"mtext",
|
||||
"nobr",
|
||||
"noembed",
|
||||
"noframes",
|
||||
"plaintext",
|
||||
"prompt",
|
||||
"public",
|
||||
"rb",
|
||||
"rtc",
|
||||
"spacer",
|
||||
"strike",
|
||||
"svg",
|
||||
"system",
|
||||
"tt",
|
||||
"xmp",
|
||||
}
|
||||
717
vendor/golang.org/x/net/publicsuffix/gen.go
generated
vendored
717
vendor/golang.org/x/net/publicsuffix/gen.go
generated
vendored
|
|
@ -1,717 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates table.go and table_test.go based on the authoritative
|
||||
// public suffix list at https://publicsuffix.org/list/effective_tld_names.dat
|
||||
//
|
||||
// The version is derived from
|
||||
// https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat
|
||||
// and a human-readable form is at
|
||||
// https://github.com/publicsuffix/list/commits/master/public_suffix_list.dat
|
||||
//
|
||||
// To fetch a particular git revision, such as 5c70ccd250, pass
|
||||
// -url "https://raw.githubusercontent.com/publicsuffix/list/5c70ccd250/public_suffix_list.dat"
|
||||
// and -version "an explicit version string".
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/format"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/idna"
|
||||
)
|
||||
|
||||
const (
|
||||
// These sum of these four values must be no greater than 32.
|
||||
nodesBitsChildren = 10
|
||||
nodesBitsICANN = 1
|
||||
nodesBitsTextOffset = 15
|
||||
nodesBitsTextLength = 6
|
||||
|
||||
// These sum of these four values must be no greater than 32.
|
||||
childrenBitsWildcard = 1
|
||||
childrenBitsNodeType = 2
|
||||
childrenBitsHi = 14
|
||||
childrenBitsLo = 14
|
||||
)
|
||||
|
||||
var (
|
||||
maxChildren int
|
||||
maxTextOffset int
|
||||
maxTextLength int
|
||||
maxHi uint32
|
||||
maxLo uint32
|
||||
)
|
||||
|
||||
func max(a, b int) int {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
func u32max(a, b uint32) uint32 {
|
||||
if a < b {
|
||||
return b
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
const (
|
||||
nodeTypeNormal = 0
|
||||
nodeTypeException = 1
|
||||
nodeTypeParentOnly = 2
|
||||
numNodeType = 3
|
||||
)
|
||||
|
||||
func nodeTypeStr(n int) string {
|
||||
switch n {
|
||||
case nodeTypeNormal:
|
||||
return "+"
|
||||
case nodeTypeException:
|
||||
return "!"
|
||||
case nodeTypeParentOnly:
|
||||
return "o"
|
||||
}
|
||||
panic("unreachable")
|
||||
}
|
||||
|
||||
const (
|
||||
defaultURL = "https://publicsuffix.org/list/effective_tld_names.dat"
|
||||
gitCommitURL = "https://api.github.com/repos/publicsuffix/list/commits?path=public_suffix_list.dat"
|
||||
)
|
||||
|
||||
var (
|
||||
labelEncoding = map[string]uint32{}
|
||||
labelsList = []string{}
|
||||
labelsMap = map[string]bool{}
|
||||
rules = []string{}
|
||||
numICANNRules = 0
|
||||
|
||||
// validSuffixRE is used to check that the entries in the public suffix
|
||||
// list are in canonical form (after Punycode encoding). Specifically,
|
||||
// capital letters are not allowed.
|
||||
validSuffixRE = regexp.MustCompile(`^[a-z0-9_\!\*\-\.]+$`)
|
||||
|
||||
shaRE = regexp.MustCompile(`"sha":"([^"]+)"`)
|
||||
dateRE = regexp.MustCompile(`"committer":{[^{]+"date":"([^"]+)"`)
|
||||
|
||||
comments = flag.Bool("comments", false, "generate table.go comments, for debugging")
|
||||
subset = flag.Bool("subset", false, "generate only a subset of the full table, for debugging")
|
||||
url = flag.String("url", defaultURL, "URL of the publicsuffix.org list. If empty, stdin is read instead")
|
||||
v = flag.Bool("v", false, "verbose output (to stderr)")
|
||||
version = flag.String("version", "", "the effective_tld_names.dat version")
|
||||
)
|
||||
|
||||
func main() {
|
||||
if err := main1(); err != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func main1() error {
|
||||
flag.Parse()
|
||||
if nodesBitsTextLength+nodesBitsTextOffset+nodesBitsICANN+nodesBitsChildren > 32 {
|
||||
return fmt.Errorf("not enough bits to encode the nodes table")
|
||||
}
|
||||
if childrenBitsLo+childrenBitsHi+childrenBitsNodeType+childrenBitsWildcard > 32 {
|
||||
return fmt.Errorf("not enough bits to encode the children table")
|
||||
}
|
||||
if *version == "" {
|
||||
if *url != defaultURL {
|
||||
return fmt.Errorf("-version was not specified, and the -url is not the default one")
|
||||
}
|
||||
sha, date, err := gitCommit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
*version = fmt.Sprintf("publicsuffix.org's public_suffix_list.dat, git revision %s (%s)", sha, date)
|
||||
}
|
||||
var r io.Reader = os.Stdin
|
||||
if *url != "" {
|
||||
res, err := http.Get(*url)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return fmt.Errorf("bad GET status for %s: %d", *url, res.Status)
|
||||
}
|
||||
r = res.Body
|
||||
defer res.Body.Close()
|
||||
}
|
||||
|
||||
var root node
|
||||
icann := false
|
||||
br := bufio.NewReader(r)
|
||||
for {
|
||||
s, err := br.ReadString('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
s = strings.TrimSpace(s)
|
||||
if strings.Contains(s, "BEGIN ICANN DOMAINS") {
|
||||
if len(rules) != 0 {
|
||||
return fmt.Errorf(`expected no rules before "BEGIN ICANN DOMAINS"`)
|
||||
}
|
||||
icann = true
|
||||
continue
|
||||
}
|
||||
if strings.Contains(s, "END ICANN DOMAINS") {
|
||||
icann, numICANNRules = false, len(rules)
|
||||
continue
|
||||
}
|
||||
if s == "" || strings.HasPrefix(s, "//") {
|
||||
continue
|
||||
}
|
||||
s, err = idna.ToASCII(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !validSuffixRE.MatchString(s) {
|
||||
return fmt.Errorf("bad publicsuffix.org list data: %q", s)
|
||||
}
|
||||
|
||||
if *subset {
|
||||
switch {
|
||||
case s == "ac.jp" || strings.HasSuffix(s, ".ac.jp"):
|
||||
case s == "ak.us" || strings.HasSuffix(s, ".ak.us"):
|
||||
case s == "ao" || strings.HasSuffix(s, ".ao"):
|
||||
case s == "ar" || strings.HasSuffix(s, ".ar"):
|
||||
case s == "arpa" || strings.HasSuffix(s, ".arpa"):
|
||||
case s == "cy" || strings.HasSuffix(s, ".cy"):
|
||||
case s == "dyndns.org" || strings.HasSuffix(s, ".dyndns.org"):
|
||||
case s == "jp":
|
||||
case s == "kobe.jp" || strings.HasSuffix(s, ".kobe.jp"):
|
||||
case s == "kyoto.jp" || strings.HasSuffix(s, ".kyoto.jp"):
|
||||
case s == "om" || strings.HasSuffix(s, ".om"):
|
||||
case s == "uk" || strings.HasSuffix(s, ".uk"):
|
||||
case s == "uk.com" || strings.HasSuffix(s, ".uk.com"):
|
||||
case s == "tw" || strings.HasSuffix(s, ".tw"):
|
||||
case s == "zw" || strings.HasSuffix(s, ".zw"):
|
||||
case s == "xn--p1ai" || strings.HasSuffix(s, ".xn--p1ai"):
|
||||
// xn--p1ai is Russian-Cyrillic "рф".
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
rules = append(rules, s)
|
||||
|
||||
nt, wildcard := nodeTypeNormal, false
|
||||
switch {
|
||||
case strings.HasPrefix(s, "*."):
|
||||
s, nt = s[2:], nodeTypeParentOnly
|
||||
wildcard = true
|
||||
case strings.HasPrefix(s, "!"):
|
||||
s, nt = s[1:], nodeTypeException
|
||||
}
|
||||
labels := strings.Split(s, ".")
|
||||
for n, i := &root, len(labels)-1; i >= 0; i-- {
|
||||
label := labels[i]
|
||||
n = n.child(label)
|
||||
if i == 0 {
|
||||
if nt != nodeTypeParentOnly && n.nodeType == nodeTypeParentOnly {
|
||||
n.nodeType = nt
|
||||
}
|
||||
n.icann = n.icann && icann
|
||||
n.wildcard = n.wildcard || wildcard
|
||||
}
|
||||
labelsMap[label] = true
|
||||
}
|
||||
}
|
||||
labelsList = make([]string, 0, len(labelsMap))
|
||||
for label := range labelsMap {
|
||||
labelsList = append(labelsList, label)
|
||||
}
|
||||
sort.Strings(labelsList)
|
||||
|
||||
if err := generate(printReal, &root, "table.go"); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := generate(printTest, &root, "table_test.go"); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func generate(p func(io.Writer, *node) error, root *node, filename string) error {
|
||||
buf := new(bytes.Buffer)
|
||||
if err := p(buf, root); err != nil {
|
||||
return err
|
||||
}
|
||||
b, err := format.Source(buf.Bytes())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return ioutil.WriteFile(filename, b, 0644)
|
||||
}
|
||||
|
||||
func gitCommit() (sha, date string, retErr error) {
|
||||
res, err := http.Get(gitCommitURL)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return "", "", fmt.Errorf("bad GET status for %s: %d", gitCommitURL, res.Status)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
b, err := ioutil.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
if m := shaRE.FindSubmatch(b); m != nil {
|
||||
sha = string(m[1])
|
||||
}
|
||||
if m := dateRE.FindSubmatch(b); m != nil {
|
||||
date = string(m[1])
|
||||
}
|
||||
if sha == "" || date == "" {
|
||||
retErr = fmt.Errorf("could not find commit SHA and date in %s", gitCommitURL)
|
||||
}
|
||||
return sha, date, retErr
|
||||
}
|
||||
|
||||
func printTest(w io.Writer, n *node) error {
|
||||
fmt.Fprintf(w, "// generated by go run gen.go; DO NOT EDIT\n\n")
|
||||
fmt.Fprintf(w, "package publicsuffix\n\nconst numICANNRules = %d\n\nvar rules = [...]string{\n", numICANNRules)
|
||||
for _, rule := range rules {
|
||||
fmt.Fprintf(w, "%q,\n", rule)
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\nvar nodeLabels = [...]string{\n")
|
||||
if err := n.walk(w, printNodeLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(w, "}\n")
|
||||
return nil
|
||||
}
|
||||
|
||||
func printReal(w io.Writer, n *node) error {
|
||||
const header = `// generated by go run gen.go; DO NOT EDIT
|
||||
|
||||
package publicsuffix
|
||||
|
||||
const version = %q
|
||||
|
||||
const (
|
||||
nodesBitsChildren = %d
|
||||
nodesBitsICANN = %d
|
||||
nodesBitsTextOffset = %d
|
||||
nodesBitsTextLength = %d
|
||||
|
||||
childrenBitsWildcard = %d
|
||||
childrenBitsNodeType = %d
|
||||
childrenBitsHi = %d
|
||||
childrenBitsLo = %d
|
||||
)
|
||||
|
||||
const (
|
||||
nodeTypeNormal = %d
|
||||
nodeTypeException = %d
|
||||
nodeTypeParentOnly = %d
|
||||
)
|
||||
|
||||
// numTLD is the number of top level domains.
|
||||
const numTLD = %d
|
||||
|
||||
`
|
||||
fmt.Fprintf(w, header, *version,
|
||||
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength,
|
||||
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo,
|
||||
nodeTypeNormal, nodeTypeException, nodeTypeParentOnly, len(n.children))
|
||||
|
||||
text := combineText(labelsList)
|
||||
if text == "" {
|
||||
return fmt.Errorf("internal error: makeText returned no text")
|
||||
}
|
||||
for _, label := range labelsList {
|
||||
offset, length := strings.Index(text, label), len(label)
|
||||
if offset < 0 {
|
||||
return fmt.Errorf("internal error: could not find %q in text %q", label, text)
|
||||
}
|
||||
maxTextOffset, maxTextLength = max(maxTextOffset, offset), max(maxTextLength, length)
|
||||
if offset >= 1<<nodesBitsTextOffset {
|
||||
return fmt.Errorf("text offset %d is too large, or nodeBitsTextOffset is too small", offset)
|
||||
}
|
||||
if length >= 1<<nodesBitsTextLength {
|
||||
return fmt.Errorf("text length %d is too large, or nodeBitsTextLength is too small", length)
|
||||
}
|
||||
labelEncoding[label] = uint32(offset)<<nodesBitsTextLength | uint32(length)
|
||||
}
|
||||
fmt.Fprintf(w, "// Text is the combined text of all labels.\nconst text = ")
|
||||
for len(text) > 0 {
|
||||
n, plus := len(text), ""
|
||||
if n > 64 {
|
||||
n, plus = 64, " +"
|
||||
}
|
||||
fmt.Fprintf(w, "%q%s\n", text[:n], plus)
|
||||
text = text[n:]
|
||||
}
|
||||
|
||||
if err := n.walk(w, assignIndexes); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fmt.Fprintf(w, `
|
||||
|
||||
// nodes is the list of nodes. Each node is represented as a uint32, which
|
||||
// encodes the node's children, wildcard bit and node type (as an index into
|
||||
// the children array), ICANN bit and text.
|
||||
//
|
||||
// If the table was generated with the -comments flag, there is a //-comment
|
||||
// after each node's data. In it is the nodes-array indexes of the children,
|
||||
// formatted as (n0x1234-n0x1256), with * denoting the wildcard bit. The
|
||||
// nodeType is printed as + for normal, ! for exception, and o for parent-only
|
||||
// nodes that have children but don't match a domain label in their own right.
|
||||
// An I denotes an ICANN domain.
|
||||
//
|
||||
// The layout within the uint32, from MSB to LSB, is:
|
||||
// [%2d bits] unused
|
||||
// [%2d bits] children index
|
||||
// [%2d bits] ICANN bit
|
||||
// [%2d bits] text index
|
||||
// [%2d bits] text length
|
||||
var nodes = [...]uint32{
|
||||
`,
|
||||
32-nodesBitsChildren-nodesBitsICANN-nodesBitsTextOffset-nodesBitsTextLength,
|
||||
nodesBitsChildren, nodesBitsICANN, nodesBitsTextOffset, nodesBitsTextLength)
|
||||
if err := n.walk(w, printNode); err != nil {
|
||||
return err
|
||||
}
|
||||
fmt.Fprintf(w, `}
|
||||
|
||||
// children is the list of nodes' children, the parent's wildcard bit and the
|
||||
// parent's node type. If a node has no children then their children index
|
||||
// will be in the range [0, 6), depending on the wildcard bit and node type.
|
||||
//
|
||||
// The layout within the uint32, from MSB to LSB, is:
|
||||
// [%2d bits] unused
|
||||
// [%2d bits] wildcard bit
|
||||
// [%2d bits] node type
|
||||
// [%2d bits] high nodes index (exclusive) of children
|
||||
// [%2d bits] low nodes index (inclusive) of children
|
||||
var children=[...]uint32{
|
||||
`,
|
||||
32-childrenBitsWildcard-childrenBitsNodeType-childrenBitsHi-childrenBitsLo,
|
||||
childrenBitsWildcard, childrenBitsNodeType, childrenBitsHi, childrenBitsLo)
|
||||
for i, c := range childrenEncoding {
|
||||
s := "---------------"
|
||||
lo := c & (1<<childrenBitsLo - 1)
|
||||
hi := (c >> childrenBitsLo) & (1<<childrenBitsHi - 1)
|
||||
if lo != hi {
|
||||
s = fmt.Sprintf("n0x%04x-n0x%04x", lo, hi)
|
||||
}
|
||||
nodeType := int(c>>(childrenBitsLo+childrenBitsHi)) & (1<<childrenBitsNodeType - 1)
|
||||
wildcard := c>>(childrenBitsLo+childrenBitsHi+childrenBitsNodeType) != 0
|
||||
if *comments {
|
||||
fmt.Fprintf(w, "0x%08x, // c0x%04x (%s)%s %s\n",
|
||||
c, i, s, wildcardStr(wildcard), nodeTypeStr(nodeType))
|
||||
} else {
|
||||
fmt.Fprintf(w, "0x%x,\n", c)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "}\n\n")
|
||||
fmt.Fprintf(w, "// max children %d (capacity %d)\n", maxChildren, 1<<nodesBitsChildren-1)
|
||||
fmt.Fprintf(w, "// max text offset %d (capacity %d)\n", maxTextOffset, 1<<nodesBitsTextOffset-1)
|
||||
fmt.Fprintf(w, "// max text length %d (capacity %d)\n", maxTextLength, 1<<nodesBitsTextLength-1)
|
||||
fmt.Fprintf(w, "// max hi %d (capacity %d)\n", maxHi, 1<<childrenBitsHi-1)
|
||||
fmt.Fprintf(w, "// max lo %d (capacity %d)\n", maxLo, 1<<childrenBitsLo-1)
|
||||
return nil
|
||||
}
|
||||
|
||||
type node struct {
|
||||
label string
|
||||
nodeType int
|
||||
icann bool
|
||||
wildcard bool
|
||||
// nodesIndex and childrenIndex are the index of this node in the nodes
|
||||
// and the index of its children offset/length in the children arrays.
|
||||
nodesIndex, childrenIndex int
|
||||
// firstChild is the index of this node's first child, or zero if this
|
||||
// node has no children.
|
||||
firstChild int
|
||||
// children are the node's children, in strictly increasing node label order.
|
||||
children []*node
|
||||
}
|
||||
|
||||
func (n *node) walk(w io.Writer, f func(w1 io.Writer, n1 *node) error) error {
|
||||
if err := f(w, n); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, c := range n.children {
|
||||
if err := c.walk(w, f); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// child returns the child of n with the given label. The child is created if
|
||||
// it did not exist beforehand.
|
||||
func (n *node) child(label string) *node {
|
||||
for _, c := range n.children {
|
||||
if c.label == label {
|
||||
return c
|
||||
}
|
||||
}
|
||||
c := &node{
|
||||
label: label,
|
||||
nodeType: nodeTypeParentOnly,
|
||||
icann: true,
|
||||
}
|
||||
n.children = append(n.children, c)
|
||||
sort.Sort(byLabel(n.children))
|
||||
return c
|
||||
}
|
||||
|
||||
type byLabel []*node
|
||||
|
||||
func (b byLabel) Len() int { return len(b) }
|
||||
func (b byLabel) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
||||
func (b byLabel) Less(i, j int) bool { return b[i].label < b[j].label }
|
||||
|
||||
var nextNodesIndex int
|
||||
|
||||
// childrenEncoding are the encoded entries in the generated children array.
|
||||
// All these pre-defined entries have no children.
|
||||
var childrenEncoding = []uint32{
|
||||
0 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeNormal.
|
||||
1 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeException.
|
||||
2 << (childrenBitsLo + childrenBitsHi), // Without wildcard bit, nodeTypeParentOnly.
|
||||
4 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeNormal.
|
||||
5 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeException.
|
||||
6 << (childrenBitsLo + childrenBitsHi), // With wildcard bit, nodeTypeParentOnly.
|
||||
}
|
||||
|
||||
var firstCallToAssignIndexes = true
|
||||
|
||||
func assignIndexes(w io.Writer, n *node) error {
|
||||
if len(n.children) != 0 {
|
||||
// Assign nodesIndex.
|
||||
n.firstChild = nextNodesIndex
|
||||
for _, c := range n.children {
|
||||
c.nodesIndex = nextNodesIndex
|
||||
nextNodesIndex++
|
||||
}
|
||||
|
||||
// The root node's children is implicit.
|
||||
if firstCallToAssignIndexes {
|
||||
firstCallToAssignIndexes = false
|
||||
return nil
|
||||
}
|
||||
|
||||
// Assign childrenIndex.
|
||||
maxChildren = max(maxChildren, len(childrenEncoding))
|
||||
if len(childrenEncoding) >= 1<<nodesBitsChildren {
|
||||
return fmt.Errorf("children table size %d is too large, or nodeBitsChildren is too small", len(childrenEncoding))
|
||||
}
|
||||
n.childrenIndex = len(childrenEncoding)
|
||||
lo := uint32(n.firstChild)
|
||||
hi := lo + uint32(len(n.children))
|
||||
maxLo, maxHi = u32max(maxLo, lo), u32max(maxHi, hi)
|
||||
if lo >= 1<<childrenBitsLo {
|
||||
return fmt.Errorf("children lo %d is too large, or childrenBitsLo is too small", lo)
|
||||
}
|
||||
if hi >= 1<<childrenBitsHi {
|
||||
return fmt.Errorf("children hi %d is too large, or childrenBitsHi is too small", hi)
|
||||
}
|
||||
enc := hi<<childrenBitsLo | lo
|
||||
enc |= uint32(n.nodeType) << (childrenBitsLo + childrenBitsHi)
|
||||
if n.wildcard {
|
||||
enc |= 1 << (childrenBitsLo + childrenBitsHi + childrenBitsNodeType)
|
||||
}
|
||||
childrenEncoding = append(childrenEncoding, enc)
|
||||
} else {
|
||||
n.childrenIndex = n.nodeType
|
||||
if n.wildcard {
|
||||
n.childrenIndex += numNodeType
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printNode(w io.Writer, n *node) error {
|
||||
for _, c := range n.children {
|
||||
s := "---------------"
|
||||
if len(c.children) != 0 {
|
||||
s = fmt.Sprintf("n0x%04x-n0x%04x", c.firstChild, c.firstChild+len(c.children))
|
||||
}
|
||||
encoding := labelEncoding[c.label]
|
||||
if c.icann {
|
||||
encoding |= 1 << (nodesBitsTextLength + nodesBitsTextOffset)
|
||||
}
|
||||
encoding |= uint32(c.childrenIndex) << (nodesBitsTextLength + nodesBitsTextOffset + nodesBitsICANN)
|
||||
if *comments {
|
||||
fmt.Fprintf(w, "0x%08x, // n0x%04x c0x%04x (%s)%s %s %s %s\n",
|
||||
encoding, c.nodesIndex, c.childrenIndex, s, wildcardStr(c.wildcard),
|
||||
nodeTypeStr(c.nodeType), icannStr(c.icann), c.label,
|
||||
)
|
||||
} else {
|
||||
fmt.Fprintf(w, "0x%x,\n", encoding)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func printNodeLabel(w io.Writer, n *node) error {
|
||||
for _, c := range n.children {
|
||||
fmt.Fprintf(w, "%q,\n", c.label)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func icannStr(icann bool) string {
|
||||
if icann {
|
||||
return "I"
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
func wildcardStr(wildcard bool) string {
|
||||
if wildcard {
|
||||
return "*"
|
||||
}
|
||||
return " "
|
||||
}
|
||||
|
||||
// combineText combines all the strings in labelsList to form one giant string.
|
||||
// Overlapping strings will be merged: "arpa" and "parliament" could yield
|
||||
// "arparliament".
|
||||
func combineText(labelsList []string) string {
|
||||
beforeLength := 0
|
||||
for _, s := range labelsList {
|
||||
beforeLength += len(s)
|
||||
}
|
||||
|
||||
text := crush(removeSubstrings(labelsList))
|
||||
if *v {
|
||||
fmt.Fprintf(os.Stderr, "crushed %d bytes to become %d bytes\n", beforeLength, len(text))
|
||||
}
|
||||
return text
|
||||
}
|
||||
|
||||
type byLength []string
|
||||
|
||||
func (s byLength) Len() int { return len(s) }
|
||||
func (s byLength) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
|
||||
func (s byLength) Less(i, j int) bool { return len(s[i]) < len(s[j]) }
|
||||
|
||||
// removeSubstrings returns a copy of its input with any strings removed
|
||||
// that are substrings of other provided strings.
|
||||
func removeSubstrings(input []string) []string {
|
||||
// Make a copy of input.
|
||||
ss := append(make([]string, 0, len(input)), input...)
|
||||
sort.Sort(byLength(ss))
|
||||
|
||||
for i, shortString := range ss {
|
||||
// For each string, only consider strings higher than it in sort order, i.e.
|
||||
// of equal length or greater.
|
||||
for _, longString := range ss[i+1:] {
|
||||
if strings.Contains(longString, shortString) {
|
||||
ss[i] = ""
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove the empty strings.
|
||||
sort.Strings(ss)
|
||||
for len(ss) > 0 && ss[0] == "" {
|
||||
ss = ss[1:]
|
||||
}
|
||||
return ss
|
||||
}
|
||||
|
||||
// crush combines a list of strings, taking advantage of overlaps. It returns a
|
||||
// single string that contains each input string as a substring.
|
||||
func crush(ss []string) string {
|
||||
maxLabelLen := 0
|
||||
for _, s := range ss {
|
||||
if maxLabelLen < len(s) {
|
||||
maxLabelLen = len(s)
|
||||
}
|
||||
}
|
||||
|
||||
for prefixLen := maxLabelLen; prefixLen > 0; prefixLen-- {
|
||||
prefixes := makePrefixMap(ss, prefixLen)
|
||||
for i, s := range ss {
|
||||
if len(s) <= prefixLen {
|
||||
continue
|
||||
}
|
||||
mergeLabel(ss, i, prefixLen, prefixes)
|
||||
}
|
||||
}
|
||||
|
||||
return strings.Join(ss, "")
|
||||
}
|
||||
|
||||
// mergeLabel merges the label at ss[i] with the first available matching label
|
||||
// in prefixMap, where the last "prefixLen" characters in ss[i] match the first
|
||||
// "prefixLen" characters in the matching label.
|
||||
// It will merge ss[i] repeatedly until no more matches are available.
|
||||
// All matching labels merged into ss[i] are replaced by "".
|
||||
func mergeLabel(ss []string, i, prefixLen int, prefixes prefixMap) {
|
||||
s := ss[i]
|
||||
suffix := s[len(s)-prefixLen:]
|
||||
for _, j := range prefixes[suffix] {
|
||||
// Empty strings mean "already used." Also avoid merging with self.
|
||||
if ss[j] == "" || i == j {
|
||||
continue
|
||||
}
|
||||
if *v {
|
||||
fmt.Fprintf(os.Stderr, "%d-length overlap at (%4d,%4d): %q and %q share %q\n",
|
||||
prefixLen, i, j, ss[i], ss[j], suffix)
|
||||
}
|
||||
ss[i] += ss[j][prefixLen:]
|
||||
ss[j] = ""
|
||||
// ss[i] has a new suffix, so merge again if possible.
|
||||
// Note: we only have to merge again at the same prefix length. Shorter
|
||||
// prefix lengths will be handled in the next iteration of crush's for loop.
|
||||
// Can there be matches for longer prefix lengths, introduced by the merge?
|
||||
// I believe that any such matches would by necessity have been eliminated
|
||||
// during substring removal or merged at a higher prefix length. For
|
||||
// instance, in crush("abc", "cde", "bcdef"), combining "abc" and "cde"
|
||||
// would yield "abcde", which could be merged with "bcdef." However, in
|
||||
// practice "cde" would already have been elimintated by removeSubstrings.
|
||||
mergeLabel(ss, i, prefixLen, prefixes)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// prefixMap maps from a prefix to a list of strings containing that prefix. The
|
||||
// list of strings is represented as indexes into a slice of strings stored
|
||||
// elsewhere.
|
||||
type prefixMap map[string][]int
|
||||
|
||||
// makePrefixMap constructs a prefixMap from a slice of strings.
|
||||
func makePrefixMap(ss []string, prefixLen int) prefixMap {
|
||||
prefixes := make(prefixMap)
|
||||
for i, s := range ss {
|
||||
// We use < rather than <= because if a label matches on a prefix equal to
|
||||
// its full length, that's actually a substring match handled by
|
||||
// removeSubstrings.
|
||||
if prefixLen < len(s) {
|
||||
prefix := s[:prefixLen]
|
||||
prefixes[prefix] = append(prefixes[prefix], i)
|
||||
}
|
||||
}
|
||||
|
||||
return prefixes
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue