Update go dependencies

This commit is contained in:
Manuel de Brito Fontes 2018-05-26 11:27:53 -04:00 committed by Manuel Alejandro de Brito Fontes
parent 15ffb51394
commit bb4d483837
No known key found for this signature in database
GPG key ID: 786136016A8BA02A
1621 changed files with 86368 additions and 284392 deletions

View file

@ -1,121 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab_test
// This file contains tests which need to import package collate, which causes
// an import cycle when done within package colltab itself.
import (
"bytes"
"testing"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
// assigned is used to only test runes that are inside the scope of the Unicode
// version used to generation the collation table.
var assigned = rangetable.Assigned(collate.UnicodeVersion)
func TestNonDigits(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Verify that all non-digit numbers sort outside of the number range.
for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ {
if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) {
continue
}
if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 {
t.Errorf("%+q non-digit number is collated as digit", a)
}
}
}
func TestNumericCompare(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Iterate over all digits.
for _, r16 := range unicode.Nd.R16 {
testDigitCompare(t, c, rune(r16.Lo), rune(r16.Hi))
}
for _, r32 := range unicode.Nd.R32 {
testDigitCompare(t, c, rune(r32.Lo), rune(r32.Hi))
}
}
func testDigitCompare(t *testing.T, c *collate.Collator, zero, nine rune) {
if !unicode.In(zero, assigned) {
return
}
n := int(nine - zero + 1)
if n%10 != 0 {
t.Fatalf("len([%+q, %+q]) = %d; want a multiple of 10", zero, nine, n)
}
for _, tt := range []struct {
prefix string
b [11]string
}{
{
prefix: "",
b: [11]string{
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
},
},
{
prefix: "1",
b: [11]string{
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
},
},
{
prefix: "0",
b: [11]string{
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
},
},
{
prefix: "00",
b: [11]string{
"000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010",
},
},
{
prefix: "9",
b: [11]string{
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
},
},
} {
for k := 0; k <= n; k++ {
i := k % 10
a := tt.prefix + string(zero+rune(i))
for j, b := range tt.b {
want := 0
switch {
case i < j:
want = -1
case i > j:
want = 1
}
got := c.CompareString(a, b)
if got != want {
t.Errorf("Compare(%+q, %+q) = %d; want %d", a, b, got, want)
return
}
}
}
}
}
func BenchmarkNumericWeighter(b *testing.B) {
c := collate.New(language.English, collate.Numeric)
input := bytes.Repeat([]byte("Testing, testing 123..."), 100)
b.SetBytes(int64(2 * len(input)))
for i := 0; i < b.N; i++ {
c.Compare(input, input)
}
}

View file

@ -1,183 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"testing"
"unicode"
)
func (e Elem) String() string {
q := ""
if v := e.Quaternary(); v == MaxQuaternary {
q = "max"
} else {
q = fmt.Sprint(v)
}
return fmt.Sprintf("[%d, %d, %d, %s]",
e.Primary(),
e.Secondary(),
e.Tertiary(),
q)
}
type ceTest struct {
f func(inout []int) (Elem, ceType)
arg []int
}
func makeCE(weights []int) Elem {
ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3]))
return ce
}
var defaultValues = []int{0, defaultSecondary, defaultTertiary, 0}
func e(w ...int) Elem {
return makeCE(append(w, defaultValues[len(w):]...))
}
func makeContractIndex(index, n, offset int) Elem {
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
ce := Elem(contractID)
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
ce += Elem(index << maxNBits)
ce += Elem(n)
return ce
}
func makeExpandIndex(index int) Elem {
const expandID = 0xE0000000
return expandID + Elem(index)
}
func makeDecompose(t1, t2 int) Elem {
const decompID = 0xF0000000
return Elem(t2<<8+t1) + decompID
}
func normalCE(inout []int) (ce Elem, t ceType) {
ce = makeCE(inout)
inout[0] = ce.Primary()
inout[1] = ce.Secondary()
inout[2] = int(ce.Tertiary())
inout[3] = int(ce.CCC())
return ce, ceNormal
}
func expandCE(inout []int) (ce Elem, t ceType) {
ce = makeExpandIndex(inout[0])
inout[0] = splitExpandIndex(ce)
return ce, ceExpansionIndex
}
func contractCE(inout []int) (ce Elem, t ceType) {
ce = makeContractIndex(inout[0], inout[1], inout[2])
i, n, o := splitContractIndex(ce)
inout[0], inout[1], inout[2] = i, n, o
return ce, ceContractionIndex
}
func decompCE(inout []int) (ce Elem, t ceType) {
ce = makeDecompose(inout[0], inout[1])
t1, t2 := splitDecompose(ce)
inout[0], inout[1] = int(t1), int(t2)
return ce, ceDecompose
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}},
{normalCE, []int{0, 30, 3, 0xFF}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
{normalCE, []int{100, defaultSecondary, 3, 0}},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
{contractCE, []int{0, 0, 0}},
{contractCE, []int{1, 1, 1}},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}},
{expandCE, []int{0}},
{expandCE, []int{5}},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}},
{decompCE, []int{0, 0}},
{decompCE, []int{1, 1}},
{decompCE, []int{0x1F, 0x1F}},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
inout := make([]int, len(tt.arg))
copy(inout, tt.arg)
ce, typ := tt.f(inout)
if ce.ctype() != typ {
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
}
for j, a := range tt.arg {
if inout[j] != a {
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
}
}
}
}
type implicitTest struct {
r rune
p int
}
var implicitTests = []implicitTest{
{0x33FF, 0x533FF},
{0x3400, 0x23400},
{0x4DC0, 0x54DC0},
{0x4DFF, 0x54DFF},
{0x4E00, 0x14E00},
{0x9FCB, 0x19FCB},
{0xA000, 0x5A000},
{0xF8FF, 0x5F8FF},
{0xF900, 0x1F900},
{0xFA23, 0x1FA23},
{0xFAD9, 0x1FAD9},
{0xFB00, 0x5FB00},
{0x20000, 0x40000},
{0x2B81C, 0x4B81C},
{unicode.MaxRune, 0x15FFFF}, // maximum primary value
}
func TestImplicit(t *testing.T) {
for _, tt := range implicitTests {
if p := implicitPrimary(tt.r); p != tt.p {
t.Errorf("%U: was %X; want %X", tt.r, p, tt.p)
}
}
}
func TestUpdateTertiary(t *testing.T) {
tests := []struct {
in, out Elem
t uint8
}{
{0x4000FE20, 0x0000FE8A, 0x0A},
{0x4000FE21, 0x0000FEAA, 0x0A},
{0x0000FE8B, 0x0000FE83, 0x03},
{0x82FF0188, 0x9BFF0188, 0x1B},
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
}
for i, tt := range tests {
if out := tt.in.updateTertiary(tt.t); out != tt.out {
t.Errorf("%d: was %X; want %X", i, out, tt.out)
}
}
}

View file

@ -1,64 +0,0 @@
package colltab
import (
"testing"
"golang.org/x/text/language"
)
func TestMatchLang(t *testing.T) {
tags := []language.Tag{
0: language.Und,
1: language.MustParse("bs"),
2: language.German,
3: language.English,
4: language.AmericanEnglish,
5: language.MustParse("en-US-u-va-posix"),
6: language.Portuguese,
7: language.Serbian,
8: language.MustParse("sr-Latn"),
9: language.Chinese,
10: language.MustParse("zh-u-co-stroke"),
11: language.MustParse("zh-Hant-u-co-pinyin"),
12: language.TraditionalChinese,
}
for i, tc := range []struct {
x int
t language.Tag
}{
{0, language.Und},
{0, language.Persian}, // Default to first element when no match.
{3, language.English},
{4, language.AmericanEnglish},
{5, language.MustParse("en-US-u-va-posix")}, // Ext. variant match.
{4, language.MustParse("en-US-u-va-noposix")}, // Ext. variant mismatch.
{3, language.MustParse("en-UK-u-va-noposix")}, // Ext. variant mismatch.
{7, language.Serbian},
{0, language.Croatian}, // Don't match to close language!
{0, language.MustParse("gsw")}, // Don't match to close language!
{1, language.MustParse("bs-Cyrl")}, // Odd, but correct.
{1, language.MustParse("bs-Latn")}, // Estimated script drops.
{8, language.MustParse("sr-Latn")},
{9, language.Chinese},
{9, language.SimplifiedChinese},
{12, language.TraditionalChinese},
{11, language.MustParse("zh-Hant-u-co-pinyin")},
// TODO: should this be 12? Either inherited value (10) or default is
// fine in this case, though. Other locales are not affected.
{10, language.MustParse("zh-Hant-u-co-stroke")},
// There is no "phonebk" sorting order for zh-Hant, so use default.
{12, language.MustParse("zh-Hant-u-co-phonebk")},
{10, language.MustParse("zh-u-co-stroke")},
{12, language.MustParse("und-TW")}, // Infer script and language.
{12, language.MustParse("und-HK")}, // Infer script and language.
{6, language.MustParse("und-BR")}, // Infer script and language.
{6, language.MustParse("und-PT")}, // Infer script and language.
{2, language.MustParse("und-Latn-DE")}, // Infer language.
{0, language.MustParse("und-Jpan-BR")}, // Infers "ja", so no match.
{0, language.MustParse("zu")}, // No match past index.
} {
if x := MatchLang(tc.t, tags); x != tc.x {
t.Errorf("%d: MatchLang(%q, tags) = %d; want %d", i, tc.t, x, tc.x)
}
}
}

View file

@ -1,131 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
type lookupStrings struct {
str string
offset int
n int // bytes consumed from input
}
type LookupTest struct {
lookup []lookupStrings
n int
tries ContractTrieSet
}
var lookupTests = []LookupTest{{
[]lookupStrings{
{"abc", 1, 3},
{"a", 0, 0},
{"b", 0, 0},
{"c", 0, 0},
{"d", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"abe", 3, 3},
{"a", 0, 0},
{"ab", 0, 0},
{"d", 0, 0},
{"f", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'e', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"ab", 2, 2},
{"a", 3, 1},
{"abcd", 1, 3},
{"abe", 2, 2},
},
1,
ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"ab", 3, 2},
{"ac", 4, 2},
{"a", 5, 1},
{"b", 6, 1},
{"ba", 6, 1},
},
2,
ContractTrieSet{
{'b', 'b', 0, 6},
{'a', 0, 2, 5},
{'c', 'c', 0, 4},
{'b', 0, 1, 3},
{'c', 'd', 0, 1},
},
}, {
[]lookupStrings{
{"bcde", 2, 4},
{"bc", 7, 2},
{"ab", 6, 2},
{"bcd", 5, 3},
{"abcd", 1, 4},
{"abc", 4, 3},
{"bcdf", 3, 4},
},
2,
ContractTrieSet{
{'b', 3, 1, 0xFF},
{'a', 0, 1, 0xFF},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', 0, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', 0, 2},
},
}}
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
scan := c.scanner(0, nnode, s)
scan.scan(0)
return scan.result()
}
func TestLookupContraction(t *testing.T) {
for i, tt := range lookupTests {
cts := ContractTrieSet(tt.tries)
for j, lu := range tt.lookup {
str := lu.str
for _, s := range []string{str, str + "X"} {
const msg = `%d:%d: %s of "%s" %v; want %v`
offset, n := lookup(&cts, tt.n, []byte(s))
if offset != lu.offset {
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
}
if n != lu.n {
t.Errorf(msg, i, j, "bytes consumed", s, n, len(str))
}
}
}
}
}

View file

@ -1,63 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{{
in: []int{4, div, 3},
out: []int{3, 4},
}, {
in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
}, {
in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
}, {
in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := Iter{}
var w, p int
for k, cc := range tt.in {
if cc == div {
w = 100
p = k
continue
}
i.Elems = append(i.Elems, makeCE([]int{w, defaultSecondary, 2, cc}))
}
i.doNorm(p, i.Elems[p].CCC())
if len(i.Elems) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.Elems), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.Elems {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.Elems[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// Combining rune overflow is tested in search/pattern_test.go.
}

View file

@ -1,159 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"reflect"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
)
const (
digSec = defaultSecondary
digTert = defaultTertiary
)
var tPlus3 = e(0, 50, digTert+3)
// numWeighter is a testWeighter used for testing numericWeighter.
var numWeighter = testWeighter{
"0": p(100),
"": []Elem{e(100, digSec, digTert+1)}, // U+FF10 FULLWIDTH DIGIT ZERO
"₀": []Elem{e(100, digSec, digTert+5)}, // U+2080 SUBSCRIPT ZERO
"1": p(101),
// Allow non-primary collation elements to be inserted.
"١": append(p(101), tPlus3), // U+0661 ARABIC-INDIC DIGIT ONE
// Allow varying tertiary weight if the number is Nd.
"": []Elem{e(101, digSec, digTert+1)}, // U+FF11 FULLWIDTH DIGIT ONE
"2": p(102),
// Allow non-primary collation elements to be inserted.
"٢": append(p(102), tPlus3), // U+0662 ARABIC-INDIC DIGIT TWO
// Varying tertiary weights should be ignored.
"": []Elem{e(102, digSec, digTert+3)}, // U+FF12 FULLWIDTH DIGIT TWO
"3": p(103),
"4": p(104),
"5": p(105),
"6": p(106),
"7": p(107),
// Weights must be strictly monotonically increasing, but do not need to be
// consecutive.
"8": p(118),
"9": p(119),
// Allow non-primary collation elements to be inserted.
"٩": append(p(119), tPlus3), // U+0669 ARABIC-INDIC DIGIT NINE
// Varying tertiary weights should be ignored.
"": []Elem{e(119, digSec, digTert+1)}, // U+FF19 FULLWIDTH DIGIT NINE
"₉": []Elem{e(119, digSec, digTert+5)}, // U+2089 SUBSCRIPT NINE
"a": p(5),
"b": p(6),
"c": p(8, 2),
"klm": p(99),
"nop": p(121),
"x": p(200),
"y": p(201),
}
func p(w ...int) (elems []Elem) {
for _, x := range w {
e, _ := MakeElem(x, digSec, digTert, 0)
elems = append(elems, e)
}
return elems
}
func TestNumericAppendNext(t *testing.T) {
for _, tt := range []struct {
in string
w []Elem
}{
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{
"",
[]Elem{
e(120), e(3),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
},
},
// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
} {
nw := NewNumericWeighter(numWeighter)
b := []byte(tt.in)
got := []Elem(nil)
for n, sz := 0, 0; n < len(b); {
got, sz = nw.AppendNext(got, b[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNext(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
got = nil
for n, sz := 0, 0; n < len(tt.in); {
got, sz = nw.AppendNextString(got, tt.in[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNextString(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
}
}
func TestNumericOverflow(t *testing.T) {
manyDigits := strings.Repeat("9", maxDigits+1) + "a"
nw := NewNumericWeighter(numWeighter)
got, n := nw.AppendNextString(nil, manyDigits)
if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
}
if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
}
}
func TestNumericWeighterAlloc(t *testing.T) {
buf := make([]Elem, 100)
w := NewNumericWeighter(numWeighter)
s := "1234567890a"
nNormal := testtext.AllocsPerRun(3, func() { numWeighter.AppendNextString(buf, s) })
nNumeric := testtext.AllocsPerRun(3, func() { w.AppendNextString(buf, s) })
if n := nNumeric - nNormal; n > 0 {
t.Errorf("got %f; want 0", n)
}
}

View file

@ -1,106 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func TestLookupTrie(t *testing.T) {
for i, r := range testRunes {
b := []byte(string(r))
v, sz := testTrie.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", r, v, i)
}
if sz != len(b) {
t.Errorf("lookup(%U): found size %d, expected %d", r, sz, len(b))
}
}
for i, tt := range tests {
v, sz := testTrie.lookup(tt.bytes)
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
// test data is taken from exp/collate/locale/build/trie_test.go
var testValues = [832]uint32{
0x000c: 0x00000001,
0x007f: 0x00000002,
0x00c0: 0x00000003,
0x0100: 0x00000004,
0x0140: 0x0000000c, 0x0141: 0x0000000d, 0x0142: 0x0000000e,
0x0150: 0x0000000f,
0x0155: 0x00000010,
0x01bf: 0x00000005,
0x01c0: 0x00000006,
0x0219: 0x00000007,
0x027f: 0x00000008,
0x0280: 0x00000009,
0x02c1: 0x0000000a,
0x033f: 0x0000000b,
}
var testLookup = [640]uint16{
0x0e0: 0x05, 0x0e6: 0x06,
0x13f: 0x07,
0x140: 0x08, 0x144: 0x09,
0x190: 0x03,
0x1ff: 0x0a,
0x20f: 0x05,
0x242: 0x01, 0x244: 0x02,
0x248: 0x03,
0x25f: 0x04,
0x260: 0x01,
0x26f: 0x02,
0x270: 0x04, 0x274: 0x06,
}
var testTrie = Trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}

View file

@ -1,42 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// testWeighter is a simple Weighter that returns weights from a user-defined map.
type testWeighter map[string][]Elem
func (t testWeighter) Start(int, []byte) int { return 0 }
func (t testWeighter) StartString(int, string) int { return 0 }
func (t testWeighter) Domain() []string { return nil }
func (t testWeighter) Top() uint32 { return 0 }
// maxContractBytes is the maximum length of any key in the map.
const maxContractBytes = 10
func (t testWeighter) AppendNext(buf []Elem, s []byte) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[string(s[:i])]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + string(s))
}
func (t testWeighter) AppendNextString(buf []Elem, s string) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[s[:i]]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + s)
}

View file

@ -55,18 +55,36 @@ func (w *CodeWriter) WriteGoFile(filename, pkg string) {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg); err != nil {
if _, err = w.WriteGo(f, pkg, ""); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteVersionedGoFile appends the buffer with the total size of all created
// structures and writes it as a Go file to the the given file with the given
// package name and build tags for the current Unicode version,
func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) {
tags := buildTags()
if tags != "" {
filename = insertVersion(filename, UnicodeVersion())
}
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, tags); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) {
func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) {
sz := w.Size
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
defer w.buf.Reset()
return WriteGo(out, pkg, w.buf.Bytes())
return WriteGo(out, pkg, tags, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {

View file

@ -31,6 +31,7 @@ import (
"os"
"path"
"path/filepath"
"strings"
"sync"
"unicode"
@ -69,8 +70,6 @@ func Init() {
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package %s
`
// UnicodeVersion reports the requested Unicode version.
@ -78,11 +77,33 @@ func UnicodeVersion() string {
return *unicodeVersion
}
// UnicodeVersion reports the requested CLDR version.
// CLDRVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
var tags = []struct{ version, buildTags string }{
{"10.0.0", "go1.10"},
{"", "!go1.10"},
}
// buildTags reports the build tags used for the current Unicode version.
func buildTags() string {
v := UnicodeVersion()
for _, x := range tags {
// We should do a numeric comparison, but including the collate package
// would create an import cycle. We approximate it by assuming that
// longer version strings are later.
if len(x.version) <= len(v) {
return x.buildTags
}
if len(x.version) == len(v) && x.version <= v {
return x.buildTags
}
}
return tags[0].buildTags
}
// IsLocal reports whether data files are available locally.
func IsLocal() bool {
dir, err := localReadmeFile()
@ -243,15 +264,46 @@ func WriteGoFile(filename, pkg string, b []byte) {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, b); err != nil {
if _, err = WriteGo(w, pkg, "", b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
func insertVersion(filename, version string) string {
suffix := ".go"
if strings.HasSuffix(filename, "_test.go") {
suffix = "_test.go"
}
return fmt.Sprint(filename[:len(filename)-len(suffix)], version, suffix)
}
// WriteVersionedGoFile prepends a standard file comment, adds build tags to
// version the file for the current Unicode version, and package statement to
// the given bytes, applies gofmt, and writes them to a file with the given
// name. It will call log.Fatal if there are any errors.
func WriteVersionedGoFile(filename, pkg string, b []byte) {
tags := buildTags()
if tags != "" {
filename = insertVersion(filename, UnicodeVersion())
}
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, tags, b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) {
src := []byte(fmt.Sprintf(header, pkg))
func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) {
src := []byte(header)
if tags != "" {
src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...)
}
src = append(src, fmt.Sprintf("package %s\n\n", pkg)...)
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {

View file

@ -1,38 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"testing"
"golang.org/x/text/language"
)
func TestParents(t *testing.T) {
testCases := []struct {
tag, parent string
}{
{"af", "und"},
{"en", "und"},
{"en-001", "en"},
{"en-AU", "en-001"},
{"en-US", "en"},
{"en-US-u-va-posix", "en-US"},
{"ca-ES-valencia", "ca-ES"},
}
for _, tc := range testCases {
tag, ok := language.CompactIndex(language.MustParse(tc.tag))
if !ok {
t.Fatalf("Could not get index of flag %s", tc.tag)
}
want, ok := language.CompactIndex(language.MustParse(tc.parent))
if !ok {
t.Fatalf("Could not get index of parent %s of tag %s", tc.parent, tc.tag)
}
if got := int(Parent[tag]); got != want {
t.Errorf("Parent[%s] = %d; want %d (%s)", tc.tag, got, want, tc.parent)
}
}
}

View file

@ -1,38 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"fmt"
"strings"
"testing"
"golang.org/x/text/language"
)
func TestUnique(t *testing.T) {
testCases := []struct {
in, want string
}{
{"", "[]"},
{"en", "[en]"},
{"en en", "[en]"},
{"en en en", "[en]"},
{"en-u-cu-eur en", "[en en-u-cu-eur]"},
{"nl en", "[en nl]"},
{"pt-Pt pt", "[pt pt-PT]"},
}
for _, tc := range testCases {
tags := []language.Tag{}
for _, s := range strings.Split(tc.in, " ") {
if s != "" {
tags = append(tags, language.MustParse(s))
}
}
if got := fmt.Sprint(UniqueTags(tags)); got != tc.want {
t.Errorf("Unique(%s) = %s; want %s", tc.in, got, tc.want)
}
}
}

View file

@ -1,56 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"strings"
"testing"
"golang.org/x/text/language"
)
func TestInheritanceMatcher(t *testing.T) {
for i, tt := range []struct {
haveTags string
wantTags string
match string
conf language.Confidence
}{
{"und,en,en-US", "en-US", "en-US", language.Exact}, // most specific match
{"zh-Hant,zh", "zh-TW", "zh-Hant", language.High}, // zh-TW implies Hant.
{"und,zh", "zh-TW", "und", language.High}, // zh-TW does not match zh.
{"zh", "zh-TW", "und", language.No}, // zh-TW does not match zh.
{"iw,en,nl", "he", "he", language.Exact}, // matches after canonicalization
{"he,en,nl", "iw", "he", language.Exact}, // matches after canonicalization
// Prefer first match over more specific match for various reasons:
// a) consistency of user interface is more important than an exact match,
// b) _if_ und is specified, it should be considered a correct and useful match,
// Note that a call to this Match will almost always be with a single tag.
{"und,en,en-US", "he,en-US", "und", language.High},
} {
have := parseTags(tt.haveTags)
m := NewInheritanceMatcher(have)
tag, index, conf := m.Match(parseTags(tt.wantTags)...)
want := language.Raw.Make(tt.match)
if tag != want {
t.Errorf("%d:tag: got %q; want %q", i, tag, want)
}
if conf != language.No {
if got, _ := language.All.Canonicalize(have[index]); got != want {
t.Errorf("%d:index: got %q; want %q ", i, got, want)
}
}
if conf != tt.conf {
t.Errorf("%d:conf: got %v; want %v", i, conf, tt.conf)
}
}
}
func parseTags(list string) (out []language.Tag) {
for _, s := range strings.Split(list, ",") {
out = append(out, language.Raw.Make(strings.TrimSpace(s)))
}
return out
}

View file

@ -4,9 +4,9 @@ package internal
// Parent maps a compact index of a tag to the compact index of the parent of
// this tag.
var Parent = []uint16{ // 754 elements
var Parent = []uint16{ // 768 elements
// Entry 0 - 3F
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0053, 0x00e8, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
@ -18,100 +18,101 @@ var Parent = []uint16{ // 754 elements
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
0x0056, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x0000,
0x005f, 0x005f, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0000, 0x0068, 0x0068, 0x0000, 0x006b, 0x0000, 0x006d, 0x006d,
0x006d, 0x006d, 0x006d, 0x006d, 0x006d, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0079, 0x0000, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
0x0000, 0x0080, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0085,
0x0000, 0x0088, 0x0089, 0x0089, 0x0089, 0x0088, 0x008a, 0x0089,
0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089, 0x008a, 0x0089,
0x008a, 0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088,
// Entry C0 - FF
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1,
0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x008a, 0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088,
0x0088, 0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0000, 0x00f1, 0x0000, 0x00f3, 0x00f4, 0x00f4, 0x00f4,
0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f4,
// Entry 100 - 13F
0x00f1, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010d, 0x0000,
0x010f, 0x0000, 0x0111, 0x0000, 0x0113, 0x0113, 0x0000, 0x0116,
0x0116, 0x0116, 0x0116, 0x0000, 0x011b, 0x0000, 0x011d, 0x0000,
0x011f, 0x011f, 0x0000, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x00f3, 0x00f3, 0x00f4, 0x00f4, 0x00f3, 0x00f4, 0x00f4, 0x00f4,
0x00f4, 0x00f3, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116,
0x0116, 0x0000, 0x0119, 0x0119, 0x0119, 0x0119, 0x0000, 0x011e,
0x0000, 0x0120, 0x0000, 0x0122, 0x0122, 0x0000, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
// Entry 140 - 17F
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0000, 0x0151, 0x0000, 0x0153, 0x0000, 0x0155, 0x0000,
0x0157, 0x0000, 0x0159, 0x0000, 0x015b, 0x015b, 0x015b, 0x0000,
0x015f, 0x0000, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
0x0166, 0x0166, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
0x0000, 0x0170, 0x0170, 0x0000, 0x0173, 0x0000, 0x0175, 0x0000,
0x0177, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e,
0x015e, 0x015e, 0x0000, 0x0162, 0x0000, 0x0000, 0x0165, 0x0000,
0x0167, 0x0000, 0x0169, 0x0169, 0x0169, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0000, 0x0173, 0x0173, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x017f, 0x0000, 0x0181, 0x0181, 0x0181, 0x0181, 0x0000, 0x0000,
0x0187, 0x0000, 0x0000, 0x018a, 0x0000, 0x018c, 0x0000, 0x0000,
0x018f, 0x0000, 0x0191, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000,
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x0000, 0x0180, 0x0000, 0x0000, 0x0183, 0x0000, 0x0185, 0x0185,
0x0185, 0x0185, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0190, 0x0000, 0x0000, 0x0193, 0x0000, 0x0195, 0x0000,
0x0000, 0x0198, 0x0000, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x01ab, 0x0000, 0x01ae,
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x01af, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x0000, 0x01bd, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x01c5,
0x01c5, 0x01c5, 0x0000, 0x01ca, 0x0000, 0x01cc, 0x01cc, 0x0000,
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x01d9, 0x0000, 0x01dc, 0x0000, 0x01de,
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x01c9, 0x01c9, 0x01c9, 0x0000, 0x01ce,
0x0000, 0x01d0, 0x01d0, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x01dd,
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x01ee, 0x01ee, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
0x0000, 0x01f8, 0x0000, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x01fd,
0x0000, 0x01f0, 0x0000, 0x01f2, 0x01f2, 0x01f2, 0x0000, 0x01f6,
0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x0000,
// Entry 200 - 23F
0x0000, 0x0200, 0x0000, 0x0202, 0x0202, 0x0000, 0x0205, 0x0205,
0x0000, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208,
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x021a, 0x0000, 0x0000, 0x021d, 0x0000,
0x021f, 0x021f, 0x0000, 0x0222, 0x0000, 0x0224, 0x0224, 0x0000,
0x0000, 0x0228, 0x0227, 0x0227, 0x0000, 0x0000, 0x022d, 0x0000,
0x022f, 0x0000, 0x0231, 0x0000, 0x023d, 0x0233, 0x023d, 0x023d,
0x023d, 0x023d, 0x023d, 0x023d, 0x023d, 0x0233, 0x023d, 0x023d,
0x01ff, 0x0000, 0x0201, 0x0201, 0x0000, 0x0204, 0x0000, 0x0206,
0x0206, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x020c,
0x020c, 0x020c, 0x020c, 0x020c, 0x0000, 0x0214, 0x0000, 0x0216,
0x0000, 0x0218, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x021e,
0x0000, 0x0000, 0x0221, 0x0000, 0x0223, 0x0223, 0x0000, 0x0226,
0x0000, 0x0228, 0x0228, 0x0000, 0x0000, 0x022c, 0x022b, 0x022b,
0x0000, 0x0000, 0x0231, 0x0000, 0x0233, 0x0000, 0x0235, 0x0000,
0x0241, 0x0237, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241,
// Entry 240 - 27F
0x0000, 0x0240, 0x0240, 0x0240, 0x0000, 0x0244, 0x0000, 0x0246,
0x0000, 0x0248, 0x0248, 0x0000, 0x024b, 0x0000, 0x024d, 0x024d,
0x024d, 0x024d, 0x024d, 0x024d, 0x0000, 0x0254, 0x0000, 0x0256,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x0000,
0x025f, 0x025f, 0x025f, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0267, 0x0000, 0x0000, 0x026a, 0x0269, 0x0269, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0272, 0x0000, 0x0000, 0x0000, 0x0000,
0x0277, 0x0000, 0x0000, 0x027a, 0x0000, 0x027c, 0x027c, 0x027c,
0x0241, 0x0237, 0x0241, 0x0241, 0x0000, 0x0244, 0x0244, 0x0244,
0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x024c, 0x0000,
0x024f, 0x0000, 0x0251, 0x0251, 0x0251, 0x0251, 0x0251, 0x0251,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x025e,
0x0000, 0x0260, 0x0000, 0x0262, 0x0000, 0x0000, 0x0265, 0x0265,
0x0265, 0x0000, 0x0269, 0x0000, 0x026b, 0x0000, 0x026d, 0x0000,
0x0000, 0x0270, 0x026f, 0x026f, 0x0000, 0x0274, 0x0000, 0x0276,
0x0000, 0x0278, 0x0000, 0x0000, 0x0000, 0x0000, 0x027d, 0x0000,
// Entry 280 - 2BF
0x027c, 0x0000, 0x0281, 0x0281, 0x0281, 0x0000, 0x0285, 0x0285,
0x0285, 0x0285, 0x0285, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
0x0000, 0x0000, 0x0000, 0x0000, 0x0293, 0x0293, 0x0293, 0x0000,
0x0297, 0x0297, 0x0297, 0x0297, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x029d, 0x0000, 0x02a2, 0x0000, 0x02a4, 0x02a4, 0x0000,
0x02a7, 0x0000, 0x02a9, 0x02a9, 0x0000, 0x0000, 0x02ad, 0x0000,
0x0000, 0x02b0, 0x0000, 0x02b2, 0x02b2, 0x0000, 0x0000, 0x02b6,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x0000, 0x02be,
0x0000, 0x0280, 0x0000, 0x0282, 0x0282, 0x0282, 0x0282, 0x0000,
0x0287, 0x0287, 0x0287, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
0x028b, 0x0000, 0x0291, 0x0291, 0x0291, 0x0291, 0x0000, 0x0000,
0x0000, 0x0000, 0x0299, 0x0299, 0x0299, 0x0000, 0x029d, 0x029d,
0x029d, 0x029d, 0x0000, 0x0000, 0x02a3, 0x02a3, 0x02a3, 0x02a3,
0x0000, 0x02a8, 0x0000, 0x02aa, 0x02aa, 0x0000, 0x02ad, 0x0000,
0x02af, 0x0000, 0x02b1, 0x02b1, 0x0000, 0x0000, 0x02b5, 0x0000,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x02ba, 0x0000, 0x0000, 0x02be,
// Entry 2C0 - 2FF
0x02be, 0x0000, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x02c1, 0x02c1,
0x0000, 0x0000, 0x02c9, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02ce,
0x0000, 0x02d0, 0x0000, 0x02d2, 0x0000, 0x0000, 0x02d5, 0x0000,
0x0000, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x0000,
0x02df, 0x02df, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x02e6, 0x02e6, 0x02e6, 0x02e6, 0x0000, 0x02ec, 0x02ed, 0x02ec,
0x0000, 0x02f0,
} // Size: 1532 bytes
0x0000, 0x02c0, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x0000, 0x02c6,
0x0000, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02cc, 0x0000, 0x02ce,
0x02cb, 0x02cb, 0x0000, 0x0000, 0x02d3, 0x02d2, 0x02d2, 0x0000,
0x0000, 0x02d8, 0x0000, 0x02da, 0x0000, 0x02dc, 0x0000, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x02e8, 0x0000, 0x02ea, 0x02ea, 0x0000, 0x0000, 0x02ee,
0x02ed, 0x02ed, 0x0000, 0x02f2, 0x0000, 0x02f4, 0x02f4, 0x02f4,
0x02f4, 0x02f4, 0x0000, 0x02fa, 0x02fb, 0x02fa, 0x0000, 0x02fe,
} // Size: 1560 bytes
// Total table size 1532 bytes (1KiB); checksum: 90718A2
// Total table size 1560 bytes (1KiB); checksum: 4897681C

View file

@ -1,67 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tag
import (
"strings"
"testing"
)
var strdata = []string{
"aa ",
"aaa ",
"aaaa",
"aaab",
"aab ",
"ab ",
"ba ",
"xxxx",
"\xff\xff\xff\xff",
}
var testCases = map[string]int{
"a": 0,
"aa": 0,
"aaa": 1,
"aa ": 0,
"aaaa": 2,
"aaab": 3,
"b": 6,
"ba": 6,
" ": -1,
"aaax": -1,
"bbbb": -1,
"zzzz": -1,
}
func TestIndex(t *testing.T) {
index := Index(strings.Join(strdata, ""))
for k, v := range testCases {
if i := index.Index([]byte(k)); i != v {
t.Errorf("%s: got %d; want %d", k, i, v)
}
}
}
func TestFixCase(t *testing.T) {
tests := []string{
"aaaa", "AbCD", "abcd",
"Zzzz", "AbCD", "Abcd",
"Zzzz", "AbC", "",
"XXX", "ab ", "",
"XXX", "usd", "USD",
"cmn", "AB ", "",
"gsw", "CMN", "cmn",
}
for tc := tests; len(tc) > 0; tc = tc[3:] {
b := []byte(tc[1])
if !FixCase(tc[0], b) {
b = nil
}
if string(b) != tc[2] {
t.Errorf("FixCase(%q, %q) = %q; want %q", tc[0], tc[1], b, tc[2])
}
}
}

View file

@ -1,875 +0,0 @@
// This file is generated with "go test -tags generate". DO NOT EDIT!
// +build !generate
package triegen_test
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return randValues[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = randIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *randTrie) lookupUnsafe(s []byte) uint8 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return randValues[c0]
}
i := randIndex[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = randIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = randIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *randTrie) lookupString(s string) (v uint8, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return randValues[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = randIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *randTrie) lookupStringUnsafe(s string) uint8 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return randValues[c0]
}
i := randIndex[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = randIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = randIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
type randTrie struct{}
func newRandTrie(i int) *randTrie {
return &randTrie{}
}
// lookupValue determines the type of block n and looks up the value for b.
func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
switch {
default:
return uint8(randValues[n<<6+uint32(b)])
}
}
// randValues: 56 blocks, 3584 entries, 3584 bytes
// The third block is the zero block.
var randValues = [3584]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc9: 0x0001,
// Block 0x4, offset 0x100
0x100: 0x0001,
// Block 0x5, offset 0x140
0x155: 0x0001,
// Block 0x6, offset 0x180
0x196: 0x0001,
// Block 0x7, offset 0x1c0
0x1ef: 0x0001,
// Block 0x8, offset 0x200
0x206: 0x0001,
// Block 0x9, offset 0x240
0x258: 0x0001,
// Block 0xa, offset 0x280
0x288: 0x0001,
// Block 0xb, offset 0x2c0
0x2f2: 0x0001,
// Block 0xc, offset 0x300
0x304: 0x0001,
// Block 0xd, offset 0x340
0x34b: 0x0001,
// Block 0xe, offset 0x380
0x3ba: 0x0001,
// Block 0xf, offset 0x3c0
0x3f5: 0x0001,
// Block 0x10, offset 0x400
0x41d: 0x0001,
// Block 0x11, offset 0x440
0x442: 0x0001,
// Block 0x12, offset 0x480
0x4bb: 0x0001,
// Block 0x13, offset 0x4c0
0x4e9: 0x0001,
// Block 0x14, offset 0x500
0x53e: 0x0001,
// Block 0x15, offset 0x540
0x55f: 0x0001,
// Block 0x16, offset 0x580
0x5b7: 0x0001,
// Block 0x17, offset 0x5c0
0x5d9: 0x0001,
// Block 0x18, offset 0x600
0x60e: 0x0001,
// Block 0x19, offset 0x640
0x652: 0x0001,
// Block 0x1a, offset 0x680
0x68f: 0x0001,
// Block 0x1b, offset 0x6c0
0x6dc: 0x0001,
// Block 0x1c, offset 0x700
0x703: 0x0001,
// Block 0x1d, offset 0x740
0x741: 0x0001,
// Block 0x1e, offset 0x780
0x79b: 0x0001,
// Block 0x1f, offset 0x7c0
0x7f1: 0x0001,
// Block 0x20, offset 0x800
0x833: 0x0001,
// Block 0x21, offset 0x840
0x853: 0x0001,
// Block 0x22, offset 0x880
0x8a2: 0x0001,
// Block 0x23, offset 0x8c0
0x8f8: 0x0001,
// Block 0x24, offset 0x900
0x917: 0x0001,
// Block 0x25, offset 0x940
0x945: 0x0001,
// Block 0x26, offset 0x980
0x99e: 0x0001,
// Block 0x27, offset 0x9c0
0x9fd: 0x0001,
// Block 0x28, offset 0xa00
0xa0d: 0x0001,
// Block 0x29, offset 0xa40
0xa66: 0x0001,
// Block 0x2a, offset 0xa80
0xaab: 0x0001,
// Block 0x2b, offset 0xac0
0xaea: 0x0001,
// Block 0x2c, offset 0xb00
0xb2d: 0x0001,
// Block 0x2d, offset 0xb40
0xb54: 0x0001,
// Block 0x2e, offset 0xb80
0xb90: 0x0001,
// Block 0x2f, offset 0xbc0
0xbe5: 0x0001,
// Block 0x30, offset 0xc00
0xc28: 0x0001,
// Block 0x31, offset 0xc40
0xc7c: 0x0001,
// Block 0x32, offset 0xc80
0xcbf: 0x0001,
// Block 0x33, offset 0xcc0
0xcc7: 0x0001,
// Block 0x34, offset 0xd00
0xd34: 0x0001,
// Block 0x35, offset 0xd40
0xd61: 0x0001,
// Block 0x36, offset 0xd80
0xdb9: 0x0001,
// Block 0x37, offset 0xdc0
0xdda: 0x0001,
}
// randIndex: 89 blocks, 5696 entries, 5696 bytes
// Block 0 is the zero block.
var randIndex = [5696]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
// Block 0x4, offset 0x100
0x107: 0x01,
// Block 0x5, offset 0x140
0x16c: 0x02,
// Block 0x6, offset 0x180
0x19c: 0x03,
0x1ae: 0x04,
// Block 0x7, offset 0x1c0
0x1d8: 0x05,
0x1f7: 0x06,
// Block 0x8, offset 0x200
0x20c: 0x07,
// Block 0x9, offset 0x240
0x24a: 0x08,
// Block 0xa, offset 0x280
0x2b6: 0x09,
// Block 0xb, offset 0x2c0
0x2d5: 0x0a,
// Block 0xc, offset 0x300
0x31a: 0x0b,
// Block 0xd, offset 0x340
0x373: 0x0c,
// Block 0xe, offset 0x380
0x38b: 0x0d,
// Block 0xf, offset 0x3c0
0x3f0: 0x0e,
// Block 0x10, offset 0x400
0x433: 0x0f,
// Block 0x11, offset 0x440
0x45d: 0x10,
// Block 0x12, offset 0x480
0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
0x49b: 0x0b, 0x49c: 0x0c,
0x4a1: 0x0d,
0x4ad: 0x0e,
0x4ba: 0x0f,
// Block 0x13, offset 0x4c0
0x4c1: 0x11,
// Block 0x14, offset 0x500
0x531: 0x12,
// Block 0x15, offset 0x540
0x546: 0x13,
// Block 0x16, offset 0x580
0x5ab: 0x14,
// Block 0x17, offset 0x5c0
0x5d4: 0x11,
0x5fe: 0x11,
// Block 0x18, offset 0x600
0x618: 0x0a,
// Block 0x19, offset 0x640
0x65b: 0x15,
// Block 0x1a, offset 0x680
0x6a0: 0x16,
// Block 0x1b, offset 0x6c0
0x6d2: 0x17,
0x6f6: 0x18,
// Block 0x1c, offset 0x700
0x711: 0x19,
// Block 0x1d, offset 0x740
0x768: 0x1a,
// Block 0x1e, offset 0x780
0x783: 0x1b,
// Block 0x1f, offset 0x7c0
0x7f9: 0x1c,
// Block 0x20, offset 0x800
0x831: 0x1d,
// Block 0x21, offset 0x840
0x85e: 0x1e,
// Block 0x22, offset 0x880
0x898: 0x1f,
// Block 0x23, offset 0x8c0
0x8c7: 0x18,
0x8d5: 0x14,
0x8f7: 0x20,
0x8fe: 0x1f,
// Block 0x24, offset 0x900
0x905: 0x21,
// Block 0x25, offset 0x940
0x966: 0x03,
// Block 0x26, offset 0x980
0x981: 0x07, 0x983: 0x11,
0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
0x9a3: 0x1d,
0x9ad: 0x1e, 0x9af: 0x1f,
0x9b0: 0x20, 0x9b1: 0x21,
0x9b8: 0x22, 0x9bd: 0x23,
// Block 0x27, offset 0x9c0
0x9cd: 0x22,
// Block 0x28, offset 0xa00
0xa0c: 0x08,
// Block 0x29, offset 0xa40
0xa6f: 0x1c,
// Block 0x2a, offset 0xa80
0xa90: 0x1a,
0xaaf: 0x23,
// Block 0x2b, offset 0xac0
0xae3: 0x19,
0xae8: 0x24,
0xafc: 0x25,
// Block 0x2c, offset 0xb00
0xb13: 0x26,
// Block 0x2d, offset 0xb40
0xb67: 0x1c,
// Block 0x2e, offset 0xb80
0xb8f: 0x0b,
// Block 0x2f, offset 0xbc0
0xbcb: 0x27,
0xbe7: 0x26,
// Block 0x30, offset 0xc00
0xc34: 0x16,
// Block 0x31, offset 0xc40
0xc62: 0x03,
// Block 0x32, offset 0xc80
0xcbb: 0x12,
// Block 0x33, offset 0xcc0
0xcdf: 0x09,
// Block 0x34, offset 0xd00
0xd34: 0x0a,
// Block 0x35, offset 0xd40
0xd41: 0x1e,
// Block 0x36, offset 0xd80
0xd83: 0x28,
// Block 0x37, offset 0xdc0
0xdc0: 0x15,
// Block 0x38, offset 0xe00
0xe1a: 0x15,
// Block 0x39, offset 0xe40
0xe65: 0x29,
// Block 0x3a, offset 0xe80
0xe86: 0x1f,
// Block 0x3b, offset 0xec0
0xeec: 0x18,
// Block 0x3c, offset 0xf00
0xf28: 0x2a,
// Block 0x3d, offset 0xf40
0xf53: 0x08,
// Block 0x3e, offset 0xf80
0xfa2: 0x2b,
0xfaa: 0x17,
// Block 0x3f, offset 0xfc0
0xfc0: 0x25, 0xfc2: 0x26,
0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
0xfd5: 0x2a,
0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
// Block 0x40, offset 0x1000
0x102c: 0x2c,
// Block 0x41, offset 0x1040
0x1074: 0x2c,
// Block 0x42, offset 0x1080
0x108c: 0x08,
0x10a0: 0x2d,
// Block 0x43, offset 0x10c0
0x10e8: 0x10,
// Block 0x44, offset 0x1100
0x110f: 0x13,
// Block 0x45, offset 0x1140
0x114b: 0x2e,
// Block 0x46, offset 0x1180
0x118b: 0x23,
0x119d: 0x0c,
// Block 0x47, offset 0x11c0
0x11c3: 0x12,
0x11f9: 0x0f,
// Block 0x48, offset 0x1200
0x121e: 0x1b,
// Block 0x49, offset 0x1240
0x1270: 0x2f,
// Block 0x4a, offset 0x1280
0x128a: 0x1b,
0x12a7: 0x02,
// Block 0x4b, offset 0x12c0
0x12fb: 0x14,
// Block 0x4c, offset 0x1300
0x1333: 0x30,
// Block 0x4d, offset 0x1340
0x134d: 0x31,
// Block 0x4e, offset 0x1380
0x138e: 0x15,
// Block 0x4f, offset 0x13c0
0x13f4: 0x32,
// Block 0x50, offset 0x1400
0x141b: 0x33,
// Block 0x51, offset 0x1440
0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
0x1472: 0x4b, 0x1473: 0x4c,
0x1479: 0x4d, 0x147b: 0x4e,
// Block 0x52, offset 0x1480
0x1480: 0x34,
0x1499: 0x11,
0x14b6: 0x2c,
// Block 0x53, offset 0x14c0
0x14e4: 0x0d,
// Block 0x54, offset 0x1500
0x1527: 0x08,
// Block 0x55, offset 0x1540
0x1555: 0x2b,
// Block 0x56, offset 0x1580
0x15b2: 0x35,
// Block 0x57, offset 0x15c0
0x15f2: 0x1c, 0x15f4: 0x29,
// Block 0x58, offset 0x1600
0x1600: 0x50, 0x1603: 0x51,
0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return t.ascii[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = multiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return t.ascii[c0]
}
i := t.utf8Start[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = multiIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = multiIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return t.ascii[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = multiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return t.ascii[c0]
}
i := t.utf8Start[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = multiIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = multiIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
type multiTrie struct {
ascii []uint64 // index for ASCII bytes
utf8Start []uint8 // index for UTF-8 bytes >= 0xC0
}
func newMultiTrie(i int) *multiTrie {
h := multiTrieHandles[i]
return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
}
type multiTrieHandle struct {
ascii, multi uint8
}
// multiTrieHandles: 5 handles, 10 bytes
var multiTrieHandles = [5]multiTrieHandle{
{0, 0}, // 8c1e77823143d35c: all
{0, 23}, // 8fb58ff8243b45b0: ASCII only
{0, 23}, // 8fb58ff8243b45b0: ASCII only 2
{0, 24}, // 2ccc43994f11046f: BMP only
{30, 25}, // ce448591bdcb4733: No BMP
}
// lookupValue determines the type of block n and looks up the value for b.
func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
switch {
default:
return uint64(multiValues[n<<6+uint32(b)])
}
}
// multiValues: 32 blocks, 2048 entries, 16384 bytes
// The third block is the zero block.
var multiValues = [2048]uint64{
// Block 0x0, offset 0x0
0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
0x3f: 0x4fd3bcfa72bce8b0,
// Block 0x1, offset 0x40
0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
0x7f: 0x782caa2d25a418a9,
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
// Block 0x4, offset 0x100
0x13f: 0x56f8c4c82f5962dc,
// Block 0x5, offset 0x140
0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
// Block 0x6, offset 0x180
0x1bf: 0x7bf4d0ebf302a088,
// Block 0x7, offset 0x1c0
0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
// Block 0x8, offset 0x200
0x23f: 0x5de81c1dff6bf29d,
// Block 0x9, offset 0x240
0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
// Block 0xa, offset 0x280
0x2bf: 0x6a28f01979cbf059,
// Block 0xb, offset 0x2c0
0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
// Block 0xc, offset 0x300
0x33f: 0x5a10ffa9e29184fb,
// Block 0xd, offset 0x340
0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
// Block 0xe, offset 0x380
0x3bf: 0x74071288fff39c76,
// Block 0xf, offset 0x3c0
0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
// Block 0x10, offset 0x400
0x43f: 0x5676a62fd49c6bec,
// Block 0x11, offset 0x440
0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
// Block 0x12, offset 0x480
0x4bf: 0x69d6f0fe711fafc9,
// Block 0x13, offset 0x4c0
0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
// Block 0x14, offset 0x500
0x53f: 0xe03b31814c95f8b,
// Block 0x15, offset 0x540
0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
// Block 0x16, offset 0x580
0x5bf: 0x3c02ea92fb168559,
// Block 0x17, offset 0x5c0
0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
// Block 0x18, offset 0x600
0x63f: 0x3bb2ed2a72748f4b,
// Block 0x19, offset 0x640
0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
// Block 0x1a, offset 0x680
0x6bf: 0x352711cfb7236418,
// Block 0x1b, offset 0x6c0
0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
// Block 0x1c, offset 0x700
0x73f: 0x7191a77b28d23110,
// Block 0x1d, offset 0x740
0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
// Block 0x1e, offset 0x780
// Block 0x1f, offset 0x7c0
}
// multiIndex: 29 blocks, 1856 entries, 1856 bytes
// Block 0 is the zero block.
var multiIndex = [1856]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
0xc8: 0x05, 0xcf: 0x06,
0xd0: 0x07,
0xdf: 0x08,
0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
0xe8: 0x08, 0xef: 0x09,
0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
// Block 0x4, offset 0x100
0x120: 0x09,
0x13f: 0x0a,
// Block 0x5, offset 0x140
0x140: 0x0b,
0x17f: 0x0c,
// Block 0x6, offset 0x180
0x180: 0x0d,
// Block 0x7, offset 0x1c0
0x1ff: 0x0e,
// Block 0x8, offset 0x200
0x200: 0x0f,
// Block 0x9, offset 0x240
0x27f: 0x10,
// Block 0xa, offset 0x280
0x280: 0x11,
// Block 0xb, offset 0x2c0
0x2ff: 0x12,
// Block 0xc, offset 0x300
0x300: 0x13,
// Block 0xd, offset 0x340
0x37f: 0x14,
// Block 0xe, offset 0x380
0x380: 0x15,
// Block 0xf, offset 0x3c0
0x3ff: 0x16,
// Block 0x10, offset 0x400
0x410: 0x0a,
0x41f: 0x0b,
0x420: 0x0c,
0x43f: 0x0d,
// Block 0x11, offset 0x440
0x440: 0x17,
// Block 0x12, offset 0x480
0x4bf: 0x18,
// Block 0x13, offset 0x4c0
0x4c0: 0x0f,
0x4ff: 0x10,
// Block 0x14, offset 0x500
0x500: 0x19,
// Block 0x15, offset 0x540
0x540: 0x12,
// Block 0x16, offset 0x580
0x5bf: 0x1a,
// Block 0x17, offset 0x5c0
0x5ff: 0x14,
// Block 0x18, offset 0x600
0x600: 0x1b,
// Block 0x19, offset 0x640
0x640: 0x16,
// Block 0x1a, offset 0x680
// Block 0x1b, offset 0x6c0
0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
0x6c8: 0x05, 0x6cf: 0x06,
0x6d0: 0x07,
0x6df: 0x08,
0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
0x6e8: 0x08, 0x6ef: 0x09,
// Block 0x1c, offset 0x700
0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
}

View file

@ -1,71 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen_test
import (
"fmt"
"io"
"io/ioutil"
"golang.org/x/text/internal/triegen"
)
func ExampleCompacter() {
t := triegen.NewTrie("root")
for r := rune(0); r < 10000; r += 64 {
t.Insert(r, 0x9015BADA55^uint64(r))
}
sz, _ := t.Gen(ioutil.Discard)
fmt.Printf("Size normal: %5d\n", sz)
var c myCompacter
sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c))
fmt.Printf("Size compacted: %5d\n", sz)
// Output:
// Size normal: 81344
// Size compacted: 3224
}
// A myCompacter accepts a block if only the first value is given.
type myCompacter []uint64
func (c *myCompacter) Size(values []uint64) (sz int, ok bool) {
for _, v := range values[1:] {
if v != 0 {
return 0, false
}
}
return 8, true // the size of a uint64
}
func (c *myCompacter) Store(v []uint64) uint32 {
x := uint32(len(*c))
*c = append(*c, v[0])
return x
}
func (c *myCompacter) Print(w io.Writer) error {
fmt.Fprintln(w, "var firstValue = []uint64{")
for _, v := range *c {
fmt.Fprintf(w, "\t%#x,\n", v)
}
fmt.Fprintln(w, "}")
return nil
}
func (c *myCompacter) Handler() string {
return "getFirstValue"
// Where getFirstValue is included along with the generated code:
// func getFirstValue(n uint32, b byte) uint64 {
// if b == 0x80 { // the first continuation byte
// return firstValue[n]
// }
// return 0
// }
}

View file

@ -1,148 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen_test
import (
"fmt"
"io/ioutil"
"math/rand"
"unicode"
"golang.org/x/text/internal/triegen"
)
const seed = 0x12345
var genWriter = ioutil.Discard
func randomRunes() map[rune]uint8 {
rnd := rand.New(rand.NewSource(seed))
m := map[rune]uint8{}
for len(m) < 100 {
// Only set our random rune if it is a valid Unicode code point.
if r := rune(rnd.Int31n(unicode.MaxRune + 1)); []rune(string(r))[0] == r {
m[r] = 1
}
}
return m
}
// Example_build shows how to build a simple trie. It assigns the value 1 to
// 100 random runes generated by randomRunes.
func Example_build() {
t := triegen.NewTrie("rand")
for r, _ := range randomRunes() {
t.Insert(r, 1)
}
sz, err := t.Gen(genWriter)
fmt.Printf("Trie size: %d bytes\n", sz)
fmt.Printf("Error: %v\n", err)
// Output:
// Trie size: 9280 bytes
// Error: <nil>
}
// Example_lookup demonstrates how to use the trie generated by Example_build.
func Example_lookup() {
trie := newRandTrie(0)
// The same set of runes used by Example_build.
runes := randomRunes()
// Verify the right value is returned for all runes.
for r := rune(0); r <= unicode.MaxRune; r++ {
// Note that the return type of lookup is uint8.
if v, _ := trie.lookupString(string(r)); v != runes[r] {
fmt.Println("FAILURE")
return
}
}
fmt.Println("SUCCESS")
// Output:
// SUCCESS
}
// runeValues generates some random values for a set of interesting runes.
func runeValues() map[rune]uint64 {
rnd := rand.New(rand.NewSource(seed))
m := map[rune]uint64{}
for p := 4; p <= unicode.MaxRune; p <<= 1 {
for d := -1; d <= 1; d++ {
m[rune(p+d)] = uint64(rnd.Int63())
}
}
return m
}
// ExampleGen_build demonstrates the creation of multiple tries sharing common
// blocks. ExampleGen_lookup demonstrates how to use the generated tries.
func ExampleGen_build() {
var tries []*triegen.Trie
rv := runeValues()
for _, c := range []struct {
include func(rune) bool
name string
}{
{func(r rune) bool { return true }, "all"},
{func(r rune) bool { return r < 0x80 }, "ASCII only"},
{func(r rune) bool { return r < 0x80 }, "ASCII only 2"},
{func(r rune) bool { return r <= 0xFFFF }, "BMP only"},
{func(r rune) bool { return r > 0xFFFF }, "No BMP"},
} {
t := triegen.NewTrie(c.name)
tries = append(tries, t)
for r, v := range rv {
if c.include(r) {
t.Insert(r, v)
}
}
}
sz, err := triegen.Gen(genWriter, "multi", tries)
fmt.Printf("Trie size: %d bytes\n", sz)
fmt.Printf("Error: %v\n", err)
// Output:
// Trie size: 18250 bytes
// Error: <nil>
}
// ExampleGen_lookup shows how to look up values in the trie generated by
// ExampleGen_build.
func ExampleGen_lookup() {
rv := runeValues()
for i, include := range []func(rune) bool{
func(r rune) bool { return true }, // all
func(r rune) bool { return r < 0x80 }, // ASCII only
func(r rune) bool { return r < 0x80 }, // ASCII only 2
func(r rune) bool { return r <= 0xFFFF }, // BMP only
func(r rune) bool { return r > 0xFFFF }, // No BMP
} {
t := newMultiTrie(i)
for r := rune(0); r <= unicode.MaxRune; r++ {
x := uint64(0)
if include(r) {
x = rv[r]
}
// As we convert from a valid rune, we know it is safe to use
// lookupStringUnsafe.
if v := t.lookupStringUnsafe(string(r)); x != v {
fmt.Println("FAILURE")
return
}
}
}
fmt.Println("SUCCESS")
// Output:
// SUCCESS
}

View file

@ -1,68 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build generate
package triegen_test
// The code in this file generates captures and writes the tries generated in
// the examples to data_test.go. To invoke it, run:
// go test -tags=generate
//
// Making the generation code a "test" allows us to link in the necessary test
// code.
import (
"log"
"os"
"os/exec"
)
func init() {
const tmpfile = "tmpout"
const dstfile = "data_test.go"
f, err := os.Create(tmpfile)
if err != nil {
log.Fatalf("Could not create output file: %v", err)
}
defer os.Remove(tmpfile)
defer f.Close()
// We exit before this function returns, regardless of success or failure,
// so there's no need to save (and later restore) the existing genWriter
// value.
genWriter = f
f.Write([]byte(header))
Example_build()
ExampleGen_build()
if err := exec.Command("gofmt", "-w", tmpfile).Run(); err != nil {
log.Fatal(err)
}
os.Remove(dstfile)
os.Rename(tmpfile, dstfile)
os.Exit(0)
}
const header = `// This file is generated with "go test -tags generate". DO NOT EDIT!
// +build !generate
package triegen_test
`
// Stubs for generated tries. These are needed as we exclude data_test.go if
// the generate flag is set. This will clearly make the tests fail, but that
// is okay. It allows us to bootstrap.
type trie struct{}
func (t *trie) lookupString(string) (uint8, int) { return 0, 1 }
func (t *trie) lookupStringUnsafe(string) uint64 { return 0 }
func newRandTrie(i int) *trie { return &trie{} }
func newMultiTrie(i int) *trie { return &trie{} }

View file

@ -1,81 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ucd_test
import (
"fmt"
"strings"
"golang.org/x/text/internal/ucd"
)
func Example() {
// Read rune-by-rune from UnicodeData.
var count int
p := ucd.New(strings.NewReader(unicodeData))
for p.Next() {
count++
if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
}
}
if err := p.Err(); err != nil {
fmt.Println(err)
}
fmt.Println("Number of runes visited:", count)
// Read raw ranges from Scripts.
p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
for p.Next() {
start, end := p.Range(0)
fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
}
if err := p.Err(); err != nil {
fmt.Println(err)
}
// Output:
// lower(U+00C0) -> U+00E0
// lower(U+00C1) -> U+00E1
// lower(U+00C2) -> U+00E2
// lower(U+00C3) -> U+00E3
// lower(U+00C4) -> U+00E4
// Number of runes visited: 6594
// 0000..001F: Common
// 0020..0020: Common
// 0021..0023: Common
// 0024..0024: Common
}
// Excerpt from UnicodeData.txt
const unicodeData = `
00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;
00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;;
00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;;
00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;;
00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;;
00BE;VULGAR FRACTION THREE QUARTERS;No;0;ON;<fraction> 0033 2044 0034;;;3/4;N;FRACTION THREE QUARTERS;;;;
00BF;INVERTED QUESTION MARK;Po;0;ON;;;;;N;;;;;
00C0;LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
00C1;LATIN CAPITAL LETTER A WITH ACUTE;Lu;0;L;0041 0301;;;;N;LATIN CAPITAL LETTER A ACUTE;;;00E1;
00C2;LATIN CAPITAL LETTER A WITH CIRCUMFLEX;Lu;0;L;0041 0302;;;;N;LATIN CAPITAL LETTER A CIRCUMFLEX;;;00E2;
00C3;LATIN CAPITAL LETTER A WITH TILDE;Lu;0;L;0041 0303;;;;N;LATIN CAPITAL LETTER A TILDE;;;00E3;
00C4;LATIN CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0041 0308;;;;N;LATIN CAPITAL LETTER A DIAERESIS;;;00E4;
# A legacy rune range.
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
`
// Excerpt from Scripts.txt
const scripts = `
# Property: Script
# ================================================
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
0020 ; Common # Zs SPACE
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
0024 ; Common # Sc DOLLAR SIGN
`

View file

@ -1,105 +0,0 @@
package ucd
import (
"strings"
"testing"
)
const file = `
# Comments should be skipped
# rune; bool; uint; int; float; runes; # Y
0..0005; Y; 0; 2; -5.25 ; 0 1 2 3 4 5;
6..0007; Yes ; 6; 1; -4.25 ; 0006 0007;
8; T ; 8 ; 0 ;-3.25 ;;# T
9; True ;9 ; -1;-2.25 ; 0009;
# more comments to be ignored
@Part0
A; N; 10 ; -2; -1.25; ;# N
B; No; 11 ; -3; -0.25;
C; False;12; -4; 0.75;
D; ;13;-5;1.75;
@Part1 # Another part.
# We test part comments get removed by not commenting the the next line.
E..10FFFF; F; 14 ; -6; 2.75;
`
var want = []struct {
start, end rune
}{
{0x00, 0x05},
{0x06, 0x07},
{0x08, 0x08},
{0x09, 0x09},
{0x0A, 0x0A},
{0x0B, 0x0B},
{0x0C, 0x0C},
{0x0D, 0x0D},
{0x0E, 0x10FFFF},
}
func TestGetters(t *testing.T) {
parts := [][2]string{
{"Part0", ""},
{"Part1", "Another part."},
}
handler := func(p *Parser) {
if len(parts) == 0 {
t.Error("Part handler invoked too many times.")
return
}
want := parts[0]
parts = parts[1:]
if got0, got1 := p.String(0), p.Comment(); got0 != want[0] || got1 != want[1] {
t.Errorf(`part: got %q, %q; want %q"`, got0, got1, want)
}
}
p := New(strings.NewReader(file), KeepRanges, Part(handler))
for i := 0; p.Next(); i++ {
start, end := p.Range(0)
w := want[i]
if start != w.start || end != w.end {
t.Fatalf("%d:Range(0); got %#x..%#x; want %#x..%#x", i, start, end, w.start, w.end)
}
if w.start == w.end && p.Rune(0) != w.start {
t.Errorf("%d:Range(0).start: got %U; want %U", i, p.Rune(0), w.start)
}
if got, want := p.Bool(1), w.start <= 9; got != want {
t.Errorf("%d:Bool(1): got %v; want %v", i, got, want)
}
if got := p.Rune(4); got != 0 || p.Err() == nil {
t.Errorf("%d:Rune(%q): got no error; want error", i, p.String(1))
}
p.err = nil
if got := p.Uint(2); rune(got) != start {
t.Errorf("%d:Uint(2): got %v; want %v", i, got, start)
}
if got, want := p.Int(3), 2-i; got != want {
t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
}
if got, want := p.Float(4), -5.25+float64(i); got != want {
t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
}
if got := p.Runes(5); got == nil {
if p.String(5) != "" {
t.Errorf("%d:Runes(5): expected non-empty list", i)
}
} else {
if got[0] != start || got[len(got)-1] != end {
t.Errorf("%d:Runes(5): got %#x; want %#x..%#x", i, got, start, end)
}
}
if got := p.Comment(); got != "" && got != p.String(1) {
t.Errorf("%d:Comment(): got %v; want %v", i, got, p.String(1))
}
}
if err := p.Err(); err != nil {
t.Errorf("Parser error: %v", err)
}
if len(parts) != 0 {
t.Errorf("expected %d more invocations of part handler", len(parts))
}
}