Update go dependencies

This commit is contained in:
Manuel de Brito Fontes 2018-05-26 11:27:53 -04:00 committed by Manuel Alejandro de Brito Fontes
parent 15ffb51394
commit bb4d483837
No known key found for this signature in database
GPG key ID: 786136016A8BA02A
1621 changed files with 86368 additions and 284392 deletions

14
vendor/golang.org/x/text/README.md generated vendored
View file

@ -9,16 +9,18 @@ This repo uses Semantic versioning (http://semver.org/), so
and
1. PATCH version when you make backwards-compatible bug fixes.
A Unicode major and minor version bump is mapped to a major version bump in
x/text.
A path version bump in Unicode is mapped to a minor version bump in x/text.
Note that, consistent with the definitions in semver, until version 1.0.0 of
x/text is reached, the minor version is considered a major version.
So going from 0.1.0 to 0.2.0 is considered to be a major version bump.
Until version 1.0.0 of x/text is reached, the minor version is considered a
major version. So going from 0.1.0 to 0.2.0 is considered to be a major version
bump.
A major new CLDR version is mapped to a minor version increase in x/text.
Any other new CLDR version is mapped to a patch version increase in x/text.
It is important that the Unicode version used in `x/text` matches the one used
by your Go compiler. The `x/text` repository supports multiple versions of
Unicode and will match the version of Unicode to that of the Go compiler. At the
moment this is supported for Go compilers from version 1.7.
## Download/Install
The easiest way to install is to run `go get -u golang.org/x/text`. You can

View file

@ -1,290 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import "testing"
// cjk returns an implicit collation element for a CJK rune.
func cjk(r rune) []rawCE {
// A CJK character C is represented in the DUCET as
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
// Where AAAA is the most significant 15 bits plus a base value.
// Any base value will work for the test, so we pick the common value of FB40.
const base = 0xFB40
return []rawCE{
{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
}
}
func pCE(p int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
}
func pqCE(p, q int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
}
func ptCE(p, t int) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, 0)
}
func ptcCE(p, t int, ccc uint8) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
}
func sCE(s int) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, 0)
}
func stCE(s, t int) []rawCE {
return mkCE([]int{0, s, t, 0}, 0)
}
func scCE(s int, ccc uint8) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
}
func mkCE(w []int, ccc uint8) []rawCE {
return []rawCE{rawCE{w, ccc}}
}
// ducetElem is used to define test data that is used to generate a table.
type ducetElem struct {
str string
ces []rawCE
}
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
b := NewBuilder()
for _, e := range ducet {
ces := [][]int{}
for _, ce := range e.ces {
ces = append(ces, ce.w)
}
if err := b.Add([]rune(e.str), ces, nil); err != nil {
t.Errorf(err.Error())
}
}
b.t = &table{}
b.root.sort()
return b
}
type convertTest struct {
in, out []rawCE
err bool
}
var convLargeTests = []convertTest{
{pCE(0xFB39), pCE(0xFB39), false},
{cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
{pCE(0xFB40), pCE(0), true},
{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
{pCE(0xFFFE), pCE(illegalOffset), false},
{pCE(0xFFFF), pCE(illegalOffset + 1), false},
}
func TestConvertLarge(t *testing.T) {
for i, tt := range convLargeTests {
e := new(entry)
for _, ce := range tt.in {
e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
}
elems, err := convertLargeWeights(e.elems)
if tt.err {
if err == nil {
t.Errorf("%d: expected error; none found", i)
}
continue
} else if err != nil {
t.Errorf("%d: unexpected error: %v", i, err)
}
if !equalCEArrays(elems, tt.out) {
t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
}
}
}
// Collation element table for simplify tests.
var simplifyTest = []ducetElem{
{"\u0300", sCE(30)}, // grave
{"\u030C", sCE(40)}, // caron
{"A", ptCE(100, 8)},
{"D", ptCE(104, 8)},
{"E", ptCE(105, 8)},
{"I", ptCE(110, 8)},
{"z", ptCE(130, 8)},
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
{"\u05B7", sCE(80)},
{"\u00C0", append(ptCE(100, 8), sCE(30)...)}, // A with grave, can be removed
{"\u00C8", append(ptCE(105, 8), sCE(30)...)}, // E with grave
{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])}, // eliminated by NFD
{"\u00C8\u0302", ptCE(106, 8)}, // block previous from simplifying
{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
// no removal: tertiary value of third element is not maxTertiary
{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
}
var genColTests = []ducetElem{
{"\uFA70", pqCE(0x1FA70, 0xFA70)},
{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
}
func TestGenColElems(t *testing.T) {
b := newBuilder(t, simplifyTest[:5])
for i, tt := range genColTests {
res := b.root.genColElems(tt.str)
if !equalCEArrays(tt.ces, res) {
t.Errorf("%d: result %X; want %X", i, res, tt.ces)
}
}
}
type strArray []string
func (sa strArray) contains(s string) bool {
for _, e := range sa {
if e == s {
return true
}
}
return false
}
var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
var simplifyMarked = strArray{"\u01C5"}
func TestSimplify(t *testing.T) {
b := newBuilder(t, simplifyTest)
o := &b.root
simplify(o)
for i, tt := range simplifyTest {
if simplifyRemoved.contains(tt.str) {
continue
}
e := o.find(tt.str)
if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
break
}
}
var i, k int
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
gold := simplifyMarked.contains(e.str)
if gold {
k++
}
if gold != e.decompose {
t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
}
i++
}
if k != len(simplifyMarked) {
t.Errorf(" an entry that should be marked as decompose was deleted")
}
}
var expandTest = []ducetElem{
{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
}
func TestExpand(t *testing.T) {
const (
totalExpansions = 5
totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
)
b := newBuilder(t, expandTest)
o := &b.root
b.processExpansions(o)
e := o.front()
for _, tt := range expandTest {
exp := b.t.ExpandElem[e.expansionIndex:]
if int(exp[0]) != len(tt.ces) {
t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
}
exp = exp[1:]
for j, w := range tt.ces {
if ce, _ := makeCE(w); exp[j] != ce {
t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
}
}
e, _ = e.nextIndexed()
}
// Verify uniquing.
if len(b.t.ExpandElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
}
}
var contractTest = []ducetElem{
{"abc", pCE(102)},
{"abd", pCE(103)},
{"a", pCE(100)},
{"ab", pCE(101)},
{"ac", pCE(104)},
{"bcd", pCE(202)},
{"b", pCE(200)},
{"bc", pCE(201)},
{"bd", pCE(203)},
// shares suffixes with a*
{"Ab", pCE(301)},
{"A", pCE(300)},
{"Ac", pCE(304)},
{"Abc", pCE(302)},
{"Abd", pCE(303)},
// starter to be ignored
{"z", pCE(1000)},
}
func TestContract(t *testing.T) {
const (
totalElements = 5 + 5 + 4
)
b := newBuilder(t, contractTest)
o := &b.root
b.processContractions(o)
indexMap := make(map[int]bool)
handleMap := make(map[rune]*entry)
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
if e.contractionHandle.n > 0 {
handleMap[e.runes[0]] = e
indexMap[e.contractionHandle.index] = true
}
}
// Verify uniquing.
if len(indexMap) != 2 {
t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
}
for _, tt := range contractTest {
e, ok := handleMap[[]rune(tt.str)[0]]
if !ok {
continue
}
str := tt.str[1:]
offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
if len(str) != n {
t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
}
ce := b.t.ContractElem[offset+e.contractionIndex]
if want, _ := makeCE(tt.ces[0]); want != ce {
t.Errorf("%s: element %X; want %X", tt.str, ce, want)
}
}
if len(b.t.ContractElem) != totalElements {
t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
}
}

View file

@ -1,215 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"testing"
"golang.org/x/text/internal/colltab"
)
type ceTest struct {
f func(in []int) (uint32, error)
arg []int
val uint32
}
func normalCE(in []int) (ce uint32, err error) {
return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
}
func expandCE(in []int) (ce uint32, err error) {
return makeExpandIndex(in[0])
}
func contractCE(in []int) (ce uint32, err error) {
return makeContractIndex(ctHandle{in[0], in[1]}, in[2])
}
func decompCE(in []int) (ce uint32, err error) {
return makeDecompose(in[0], in[1])
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
// non-ignorable primary with non-default secondary
{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},
{contractCE, []int{0, 0, 0}, 0xC0000000},
{contractCE, []int{1, 1, 1}, 0xC0010011},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}, 0xC001001F},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}, 0xC001FFF1},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}, 0xDFFF0011},
{contractCE, []int{1, (1 << maxNBits), 1}, 0xFFFF},
{contractCE, []int{(1 << maxTrieIndexBits), 1, 1}, 0xFFFF},
{contractCE, []int{1, (1 << maxContractOffsetBits), 1}, 0xFFFF},
{expandCE, []int{0}, 0xE0000000},
{expandCE, []int{5}, 0xE0000005},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}, 0xE000FFFF},
{expandCE, []int{1 << maxExpandIndexBits}, 0xFFFF},
{decompCE, []int{0, 0}, 0xF0000000},
{decompCE, []int{1, 1}, 0xF0000101},
{decompCE, []int{0x1F, 0x1F}, 0xF0001F1F},
{decompCE, []int{256, 0x1F}, 0xFFFF},
{decompCE, []int{0x1F, 256}, 0xFFFF},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
in := make([]int, len(tt.arg))
copy(in, tt.arg)
ce, err := tt.f(in)
if tt.val == 0xFFFF {
if err == nil {
t.Errorf("%d: expected error for args %x", i, tt.arg)
}
continue
}
if err != nil {
t.Errorf("%d: unexpected error: %v", i, err.Error())
}
if ce != tt.val {
t.Errorf("%d: colElem=%X; want %X", i, ce, tt.val)
}
}
}
func mkRawCES(in [][]int) []rawCE {
out := []rawCE{}
for _, w := range in {
out = append(out, rawCE{w: w})
}
return out
}
type weightsTest struct {
a, b [][]int
level colltab.Level
result int
}
var nextWeightTests = []weightsTest{
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{101, defaultSecondary, defaultTertiary, 0}},
level: colltab.Primary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 21, defaultTertiary, 0}},
level: colltab.Secondary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 6, 0}},
level: colltab.Tertiary,
},
{
a: [][]int{{100, 20, 5, 0}},
b: [][]int{{100, 20, 5, 0}},
level: colltab.Identity,
},
}
var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
func TestNextWeight(t *testing.T) {
for i, tt := range nextWeightTests {
test := func(l colltab.Level, tt weightsTest, a, gold [][]int) {
res := nextWeight(tt.level, mkRawCES(a))
if !equalCEArrays(mkRawCES(gold), res) {
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
}
}
test(-1, tt, tt.a, tt.b)
for l := colltab.Primary; l <= colltab.Tertiary; l++ {
if tt.level <= l {
test(l, tt, append(tt.a, extra[l]), tt.b)
} else {
test(l, tt, append(tt.a, extra[l]), append(tt.b, extra[l]))
}
}
}
}
var compareTests = []weightsTest{
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 5, 0}},
colltab.Identity,
0,
},
{
[][]int{{100, 20, 5, 0}, extra[0]},
[][]int{{100, 20, 5, 1}},
colltab.Primary,
1,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{101, 20, 5, 0}},
colltab.Primary,
-1,
},
{
[][]int{{101, 20, 5, 0}},
[][]int{{100, 20, 5, 0}},
colltab.Primary,
1,
},
{
[][]int{{100, 0, 0, 0}, {0, 20, 5, 0}},
[][]int{{0, 20, 5, 0}, {100, 0, 0, 0}},
colltab.Identity,
0,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 21, 5, 0}},
colltab.Secondary,
-1,
},
{
[][]int{{100, 20, 5, 0}},
[][]int{{100, 20, 2, 0}},
colltab.Tertiary,
1,
},
{
[][]int{{100, 20, 5, 1}},
[][]int{{100, 20, 5, 2}},
colltab.Quaternary,
-1,
},
}
func TestCompareWeights(t *testing.T) {
for i, tt := range compareTests {
test := func(tt weightsTest, a, b [][]int) {
res, level := compareWeights(mkRawCES(a), mkRawCES(b))
if res != tt.result {
t.Errorf("%d: expected comparison result %d; found %d", i, tt.result, res)
}
if level != tt.level {
t.Errorf("%d: expected level %d; found %d", i, tt.level, level)
}
}
test(tt, tt.a, tt.b)
test(tt, append(tt.a, extra[0]), append(tt.b, extra[0]))
}
}

View file

@ -1,266 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"bytes"
"sort"
"testing"
"golang.org/x/text/internal/colltab"
)
var largetosmall = []stridx{
{"a", 5},
{"ab", 4},
{"abc", 3},
{"abcd", 2},
{"abcde", 1},
{"abcdef", 0},
}
var offsetSortTests = [][]stridx{
{
{"bcde", 1},
{"bc", 5},
{"ab", 4},
{"bcd", 3},
{"abcd", 0},
{"abc", 2},
},
largetosmall,
}
func TestOffsetSort(t *testing.T) {
for i, st := range offsetSortTests {
sort.Sort(offsetSort(st))
for j, si := range st {
if j != si.index {
t.Errorf("%d: failed: %v", i, st)
}
}
}
for i, tt := range genStateTests {
// ensure input is well-formed
sort.Sort(offsetSort(tt.in))
for j, si := range tt.in {
if si.index != j+1 {
t.Errorf("%dth sort failed: %v", i, tt.in)
}
}
}
}
var genidxtest1 = []stridx{
{"bcde", 3},
{"bc", 6},
{"ab", 2},
{"bcd", 5},
{"abcd", 0},
{"abc", 1},
{"bcdf", 4},
}
var genidxSortTests = [][]stridx{
genidxtest1,
largetosmall,
}
func TestGenIdxSort(t *testing.T) {
for i, st := range genidxSortTests {
sort.Sort(genidxSort(st))
for j, si := range st {
if j != si.index {
t.Errorf("%dth sort failed %v", i, st)
break
}
}
}
}
var entrySortTests = []colltab.ContractTrieSet{
{
{10, 0, 1, 3},
{99, 0, 1, 0},
{20, 50, 0, 2},
{30, 0, 1, 1},
},
}
func TestEntrySort(t *testing.T) {
for i, et := range entrySortTests {
sort.Sort(entrySort(et))
for j, fe := range et {
if j != int(fe.I) {
t.Errorf("%dth sort failed %v", i, et)
break
}
}
}
}
type GenStateTest struct {
in []stridx
firstBlockLen int
out colltab.ContractTrieSet
}
var genStateTests = []GenStateTest{
{[]stridx{
{"abc", 1},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, noIndex},
{'b', 0, 1, noIndex},
{'c', 'c', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"abd", 2},
{"abe", 3},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, noIndex},
{'b', 0, 1, noIndex},
{'c', 'e', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"ab", 2},
{"a", 3},
},
1,
colltab.ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', final, 1},
},
},
{[]stridx{
{"abc", 1},
{"abd", 2},
{"ab", 3},
{"ac", 4},
{"a", 5},
{"b", 6},
},
2,
colltab.ContractTrieSet{
{'b', 'b', final, 6},
{'a', 0, 2, 5},
{'c', 'c', final, 4},
{'b', 0, 1, 3},
{'c', 'd', final, 1},
},
},
{[]stridx{
{"bcde", 2},
{"bc", 7},
{"ab", 6},
{"bcd", 5},
{"abcd", 1},
{"abc", 4},
{"bcdf", 3},
},
2,
colltab.ContractTrieSet{
{'b', 3, 1, noIndex},
{'a', 0, 1, noIndex},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', final, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', final, 2},
},
},
}
func TestGenStates(t *testing.T) {
for i, tt := range genStateTests {
si := []stridx{}
for _, e := range tt.in {
si = append(si, e)
}
// ensure input is well-formed
sort.Sort(genidxSort(si))
ct := colltab.ContractTrieSet{}
n, _ := genStates(&ct, si)
if nn := tt.firstBlockLen; nn != n {
t.Errorf("%d: block len %v; want %v", i, n, nn)
}
if lv, lw := len(ct), len(tt.out); lv != lw {
t.Errorf("%d: len %v; want %v", i, lv, lw)
continue
}
for j, fe := range tt.out {
const msg = "%d:%d: value %s=%v; want %v"
if fe.L != ct[j].L {
t.Errorf(msg, i, j, "l", ct[j].L, fe.L)
}
if fe.H != ct[j].H {
t.Errorf(msg, i, j, "h", ct[j].H, fe.H)
}
if fe.N != ct[j].N {
t.Errorf(msg, i, j, "n", ct[j].N, fe.N)
}
if fe.I != ct[j].I {
t.Errorf(msg, i, j, "i", ct[j].I, fe.I)
}
}
}
}
func TestLookupContraction(t *testing.T) {
for i, tt := range genStateTests {
input := []string{}
for _, e := range tt.in {
input = append(input, e.str)
}
cts := colltab.ContractTrieSet{}
h, _ := appendTrie(&cts, input)
for j, si := range tt.in {
str := si.str
for _, s := range []string{str, str + "X"} {
msg := "%d:%d: %s(%s) %v; want %v"
idx, sn := lookup(&cts, h, []byte(s))
if idx != si.index {
t.Errorf(msg, i, j, "index", s, idx, si.index)
}
if sn != len(str) {
t.Errorf(msg, i, j, "sn", s, sn, len(str))
}
}
}
}
}
func TestPrintContractionTrieSet(t *testing.T) {
testdata := colltab.ContractTrieSet(genStateTests[4].out)
buf := &bytes.Buffer{}
print(&testdata, buf, "test")
if contractTrieOutput != buf.String() {
t.Errorf("output differs; found\n%s", buf.String())
println(string(buf.Bytes()))
}
}
const contractTrieOutput = `// testCTEntries: 8 entries, 32 bytes
var testCTEntries = [8]struct{L,H,N,I uint8}{
{0x62, 0x3, 1, 255},
{0x61, 0x0, 1, 255},
{0x62, 0x0, 1, 6},
{0x63, 0x0, 1, 4},
{0x64, 0x64, 0, 1},
{0x63, 0x0, 1, 7},
{0x64, 0x0, 1, 5},
{0x65, 0x66, 0, 2},
}
var testContractTrieSet = colltab.ContractTrieSet( testCTEntries[:] )
`

View file

@ -1,229 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"strconv"
"testing"
"golang.org/x/text/internal/colltab"
)
type entryTest struct {
f func(in []int) (uint32, error)
arg []int
val uint32
}
// makeList returns a list of entries of length n+2, with n normal
// entries plus a leading and trailing anchor.
func makeList(n int) []*entry {
es := make([]*entry, n+2)
weights := []rawCE{{w: []int{100, 20, 5, 0}}}
for i := range es {
runes := []rune{rune(i)}
es[i] = &entry{
runes: runes,
elems: weights,
}
weights = nextWeight(colltab.Primary, weights)
}
for i := 1; i < len(es); i++ {
es[i-1].next = es[i]
es[i].prev = es[i-1]
_, es[i-1].level = compareWeights(es[i-1].elems, es[i].elems)
}
es[0].exclude = true
es[0].logical = firstAnchor
es[len(es)-1].exclude = true
es[len(es)-1].logical = lastAnchor
return es
}
func TestNextIndexed(t *testing.T) {
const n = 5
es := makeList(n)
for i := int64(0); i < 1<<n; i++ {
mask := strconv.FormatInt(i+(1<<n), 2)
for i, c := range mask {
es[i].exclude = c == '1'
}
e := es[0]
for i, c := range mask {
if c == '0' {
e, _ = e.nextIndexed()
if e != es[i] {
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
}
}
}
if e, _ = e.nextIndexed(); e != nil {
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
}
}
}
func TestRemove(t *testing.T) {
const n = 5
for i := int64(0); i < 1<<n; i++ {
es := makeList(n)
mask := strconv.FormatInt(i+(1<<n), 2)
for i, c := range mask {
if c == '0' {
es[i].remove()
}
}
e := es[0]
for i, c := range mask {
if c == '1' {
if e != es[i] {
t.Errorf("%d: expected entry %d; found %d", i, es[i].elems, e.elems)
}
e, _ = e.nextIndexed()
}
}
if e != nil {
t.Errorf("%d: expected nil entry; found %d", i, e.elems)
}
}
}
// nextPerm generates the next permutation of the array. The starting
// permutation is assumed to be a list of integers sorted in increasing order.
// It returns false if there are no more permuations left.
func nextPerm(a []int) bool {
i := len(a) - 2
for ; i >= 0; i-- {
if a[i] < a[i+1] {
break
}
}
if i < 0 {
return false
}
for j := len(a) - 1; j >= i; j-- {
if a[j] > a[i] {
a[i], a[j] = a[j], a[i]
break
}
}
for j := i + 1; j < (len(a)+i+1)/2; j++ {
a[j], a[len(a)+i-j] = a[len(a)+i-j], a[j]
}
return true
}
func TestInsertAfter(t *testing.T) {
const n = 5
orig := makeList(n)
perm := make([]int, n)
for i := range perm {
perm[i] = i + 1
}
for ok := true; ok; ok = nextPerm(perm) {
es := makeList(n)
last := es[0]
for _, i := range perm {
last.insertAfter(es[i])
last = es[i]
}
for _, e := range es {
e.elems = es[0].elems
}
e := es[0]
for _, i := range perm {
e, _ = e.nextIndexed()
if e.runes[0] != orig[i].runes[0] {
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
break
}
}
}
}
func TestInsertBefore(t *testing.T) {
const n = 5
orig := makeList(n)
perm := make([]int, n)
for i := range perm {
perm[i] = i + 1
}
for ok := true; ok; ok = nextPerm(perm) {
es := makeList(n)
last := es[len(es)-1]
for _, i := range perm {
last.insertBefore(es[i])
last = es[i]
}
for _, e := range es {
e.elems = es[0].elems
}
e := es[0]
for i := n - 1; i >= 0; i-- {
e, _ = e.nextIndexed()
if e.runes[0] != rune(perm[i]) {
t.Errorf("%d:%d: expected entry %X; found %X", perm, i, orig[i].runes, e.runes)
break
}
}
}
}
type entryLessTest struct {
a, b *entry
res bool
}
var (
w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
)
var entryLessTests = []entryLessTest{
{&entry{str: "a", elems: w1},
&entry{str: "a", elems: w1},
false,
},
{&entry{str: "a", elems: w1},
&entry{str: "a", elems: w2},
true,
},
{&entry{str: "a", elems: w1},
&entry{str: "b", elems: w1},
true,
},
{&entry{str: "a", elems: w2},
&entry{str: "a", elems: w1},
false,
},
{&entry{str: "c", elems: w1},
&entry{str: "b", elems: w1},
false,
},
{&entry{str: "a", elems: w1, logical: firstAnchor},
&entry{str: "a", elems: w1},
true,
},
{&entry{str: "a", elems: w1},
&entry{str: "b", elems: w1, logical: firstAnchor},
false,
},
{&entry{str: "b", elems: w1},
&entry{str: "a", elems: w1, logical: lastAnchor},
true,
},
{&entry{str: "a", elems: w1, logical: lastAnchor},
&entry{str: "c", elems: w1},
false,
},
}
func TestEntryLess(t *testing.T) {
for i, tt := range entryLessTests {
if res := entryLess(tt.a, tt.b); res != tt.res {
t.Errorf("%d: was %v; want %v", i, res, tt.res)
}
}
}

View file

@ -1,107 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package build
import (
"bytes"
"fmt"
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
func makeTestTrie(t *testing.T) trie {
n := newNode()
for i, r := range testRunes {
n.insert(r, uint32(i))
}
idx := newTrieBuilder()
idx.addTrie(n)
tr, err := idx.generate()
if err != nil {
t.Errorf(err.Error())
}
return *tr
}
func TestGenerateTrie(t *testing.T) {
testdata := makeTestTrie(t)
buf := &bytes.Buffer{}
testdata.printArrays(buf, "test")
fmt.Fprintf(buf, "var testTrie = ")
testdata.printStruct(buf, &trieHandle{19, 0}, "test")
if output != buf.String() {
t.Error("output differs")
}
}
var output = `// testValues: 832 entries, 3328 bytes
// Block 2 is the null block.
var testValues = [832]uint32 {
// Block 0x0, offset 0x0
0x000c:0x00000001,
// Block 0x1, offset 0x40
0x007f:0x00000002,
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x00c0:0x00000003,
// Block 0x4, offset 0x100
0x0100:0x00000004,
// Block 0x5, offset 0x140
0x0140:0x0000000c, 0x0141:0x0000000d, 0x0142:0x0000000e,
0x0150:0x0000000f,
0x0155:0x00000010,
// Block 0x6, offset 0x180
0x01bf:0x00000005,
// Block 0x7, offset 0x1c0
0x01c0:0x00000006,
// Block 0x8, offset 0x200
0x0219:0x00000007,
// Block 0x9, offset 0x240
0x027f:0x00000008,
// Block 0xa, offset 0x280
0x0280:0x00000009,
// Block 0xb, offset 0x2c0
0x02c1:0x0000000a,
// Block 0xc, offset 0x300
0x033f:0x0000000b,
}
// testLookup: 640 entries, 1280 bytes
// Block 0 is the null block.
var testLookup = [640]uint16 {
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0x0e0:0x05, 0x0e6:0x06,
// Block 0x4, offset 0x100
0x13f:0x07,
// Block 0x5, offset 0x140
0x140:0x08, 0x144:0x09,
// Block 0x6, offset 0x180
0x190:0x03,
// Block 0x7, offset 0x1c0
0x1ff:0x0a,
// Block 0x8, offset 0x200
0x20f:0x05,
// Block 0x9, offset 0x240
0x242:0x01, 0x244:0x02,
0x248:0x03,
0x25f:0x04,
0x260:0x01,
0x26f:0x02,
0x270:0x04, 0x274:0x06,
}
var testTrie = trie{ testLookup[1216:], testValues[0:], testLookup[:], testValues[:]}`

View file

@ -1,482 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"bytes"
"testing"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
)
type weightsTest struct {
opt opts
in, out ColElems
}
type opts struct {
lev int
alt alternateHandling
top int
backwards bool
caseLevel bool
}
// ignore returns an initialized boolean array based on the given Level.
// A negative value means using the default setting of quaternary.
func ignore(level colltab.Level) (ignore [colltab.NumLevels]bool) {
if level < 0 {
level = colltab.Quaternary
}
for i := range ignore {
ignore[i] = level < colltab.Level(i)
}
return ignore
}
func makeCE(w []int) colltab.Elem {
ce, err := colltab.MakeElem(w[0], w[1], w[2], uint8(w[3]))
if err != nil {
panic(err)
}
return ce
}
func (o opts) collator() *Collator {
c := &Collator{
options: options{
ignore: ignore(colltab.Level(o.lev - 1)),
alternate: o.alt,
backwards: o.backwards,
caseLevel: o.caseLevel,
variableTop: uint32(o.top),
},
}
return c
}
const (
maxQ = 0x1FFFFF
)
func wpq(p, q int) Weights {
return W(p, defaults.Secondary, defaults.Tertiary, q)
}
func wsq(s, q int) Weights {
return W(0, s, defaults.Tertiary, q)
}
func wq(q int) Weights {
return W(0, 0, 0, q)
}
var zero = W(0, 0, 0, 0)
var processTests = []weightsTest{
// Shifted
{ // simple sequence of non-variables
opt: opts{alt: altShifted, top: 100},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
},
{ // first is a variable
opt: opts{alt: altShifted, top: 250},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
},
{ // all but first are variable
opt: opts{alt: altShifted, top: 999},
in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
},
{ // first is a modifier
opt: opts{alt: altShifted, top: 999},
in: ColElems{W(0, 10), W(1000)},
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
},
{ // primary ignorables
opt: opts{alt: altShifted, top: 250},
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
},
{ // secondary ignorables
opt: opts{alt: altShifted, top: 250},
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
},
{ // tertiary ignorables, no change
opt: opts{alt: altShifted, top: 250},
in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
},
// ShiftTrimmed (same as Shifted)
{ // simple sequence of non-variables
opt: opts{alt: altShiftTrimmed, top: 100},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{wpq(200, maxQ), wpq(300, maxQ), wpq(400, maxQ)},
},
{ // first is a variable
opt: opts{alt: altShiftTrimmed, top: 250},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{wq(200), wpq(300, maxQ), wpq(400, maxQ)},
},
{ // all but first are variable
opt: opts{alt: altShiftTrimmed, top: 999},
in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{wpq(1000, maxQ), wq(200), wq(300), wq(400)},
},
{ // first is a modifier
opt: opts{alt: altShiftTrimmed, top: 999},
in: ColElems{W(0, 10), W(1000)},
out: ColElems{wsq(10, maxQ), wpq(1000, maxQ)},
},
{ // primary ignorables
opt: opts{alt: altShiftTrimmed, top: 250},
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), wsq(15, maxQ), wpq(400, maxQ)},
},
{ // secondary ignorables
opt: opts{alt: altShiftTrimmed, top: 250},
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), W(0, 0, 15, maxQ), wpq(400, maxQ)},
},
{ // tertiary ignorables, no change
opt: opts{alt: altShiftTrimmed, top: 250},
in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{wq(200), zero, wpq(300, maxQ), zero, wpq(400, maxQ)},
},
// Blanked
{ // simple sequence of non-variables
opt: opts{alt: altBlanked, top: 100},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{W(200), W(300), W(400)},
},
{ // first is a variable
opt: opts{alt: altBlanked, top: 250},
in: ColElems{W(200), W(300), W(400)},
out: ColElems{zero, W(300), W(400)},
},
{ // all but first are variable
opt: opts{alt: altBlanked, top: 999},
in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{W(1000), zero, zero, zero},
},
{ // first is a modifier
opt: opts{alt: altBlanked, top: 999},
in: ColElems{W(0, 10), W(1000)},
out: ColElems{W(0, 10), W(1000)},
},
{ // primary ignorables
opt: opts{alt: altBlanked, top: 250},
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{zero, zero, W(300), W(0, 15), W(400)},
},
{ // secondary ignorables
opt: opts{alt: altBlanked, top: 250},
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{zero, zero, W(300), W(0, 0, 15), W(400)},
},
{ // tertiary ignorables, no change
opt: opts{alt: altBlanked, top: 250},
in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{zero, zero, W(300), zero, W(400)},
},
// Non-ignorable: input is always equal to output.
{ // all but first are variable
opt: opts{alt: altNonIgnorable, top: 999},
in: ColElems{W(1000), W(200), W(300), W(400)},
out: ColElems{W(1000), W(200), W(300), W(400)},
},
{ // primary ignorables
opt: opts{alt: altNonIgnorable, top: 250},
in: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
out: ColElems{W(200), W(0, 10), W(300), W(0, 15), W(400)},
},
{ // secondary ignorables
opt: opts{alt: altNonIgnorable, top: 250},
in: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
out: ColElems{W(200), W(0, 0, 10), W(300), W(0, 0, 15), W(400)},
},
{ // tertiary ignorables, no change
opt: opts{alt: altNonIgnorable, top: 250},
in: ColElems{W(200), zero, W(300), zero, W(400)},
out: ColElems{W(200), zero, W(300), zero, W(400)},
},
}
func TestProcessWeights(t *testing.T) {
for i, tt := range processTests {
in := convertFromWeights(tt.in)
out := convertFromWeights(tt.out)
processWeights(tt.opt.alt, uint32(tt.opt.top), in)
for j, w := range in {
if w != out[j] {
t.Errorf("%d: Weights %d was %v; want %v", i, j, w, out[j])
}
}
}
}
type keyFromElemTest struct {
opt opts
in ColElems
out []byte
}
var defS = byte(defaults.Secondary)
var defT = byte(defaults.Tertiary)
const sep = 0 // separator byte
var keyFromElemTests = []keyFromElemTest{
{ // simple primary and secondary weights.
opts{alt: altShifted},
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
},
},
{ // same as first, but with zero element that need to be removed
opts{alt: altShifted},
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
},
},
{ // same as first, with large primary values
opts{alt: altShifted},
ColElems{W(0x200), W(0x8000), W(0, 0x30), W(0x12345)},
[]byte{0x2, 0, 0x80, 0x80, 0x00, 0x81, 0x23, 0x45, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
},
},
{ // same as first, but with the secondary level backwards
opts{alt: altShifted, backwards: true},
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, 0x30, 0, defS, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
sep, 0xFF, 0xFF, 0xFF, 0xFF, // quaternary
},
},
{ // same as first, ignoring quaternary level
opts{alt: altShifted, lev: 3},
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
},
},
{ // same as first, ignoring tertiary level
opts{alt: altShifted, lev: 2},
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
},
},
{ // same as first, ignoring secondary level
opts{alt: altShifted, lev: 1},
ColElems{W(0x200), zero, W(0x7FFF), W(0, 0x30), zero, W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00},
},
{ // simple primary and secondary weights.
opts{alt: altShiftTrimmed, top: 0x250},
ColElems{W(0x300), W(0x200), W(0x7FFF), W(0, 0x30), W(0x800)},
[]byte{0x3, 0, 0x7F, 0xFF, 0x8, 0x00, // primary
sep, sep, 0, defS, 0, defS, 0, 0x30, 0, defS, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
sep, 0xFF, 0x2, 0, // quaternary
},
},
{ // as first, primary with case level enabled
opts{alt: altShifted, lev: 1, caseLevel: true},
ColElems{W(0x200), W(0x7FFF), W(0, 0x30), W(0x100)},
[]byte{0x2, 0, 0x7F, 0xFF, 0x1, 0x00, // primary
sep, sep, // secondary
sep, sep, defT, defT, defT, defT, // tertiary
},
},
}
func TestKeyFromElems(t *testing.T) {
buf := Buffer{}
for i, tt := range keyFromElemTests {
buf.Reset()
in := convertFromWeights(tt.in)
processWeights(tt.opt.alt, uint32(tt.opt.top), in)
tt.opt.collator().keyFromElems(&buf, in)
res := buf.key
if len(res) != len(tt.out) {
t.Errorf("%d: len(ws) was %d; want %d (%X should be %X)", i, len(res), len(tt.out), res, tt.out)
}
n := len(res)
if len(tt.out) < n {
n = len(tt.out)
}
for j, c := range res[:n] {
if c != tt.out[j] {
t.Errorf("%d: byte %d was %X; want %X", i, j, c, tt.out[j])
}
}
}
}
func TestGetColElems(t *testing.T) {
for i, tt := range appendNextTests {
c, err := makeTable(tt.in)
if err != nil {
// error is reported in TestAppendNext
continue
}
// Create one large test per table
str := make([]byte, 0, 4000)
out := ColElems{}
for len(str) < 3000 {
for _, chk := range tt.chk {
str = append(str, chk.in[:chk.n]...)
out = append(out, chk.out...)
}
}
for j, chk := range append(tt.chk, check{string(str), len(str), out}) {
out := convertFromWeights(chk.out)
ce := c.getColElems([]byte(chk.in)[:chk.n])
if len(ce) != len(out) {
t.Errorf("%d:%d: len(ws) was %d; want %d", i, j, len(ce), len(out))
continue
}
cnt := 0
for k, w := range ce {
w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
if w != out[k] {
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
cnt++
}
if cnt > 10 {
break
}
}
}
}
}
type keyTest struct {
in string
out []byte
}
var keyTests = []keyTest{
{"abc",
[]byte{0, 100, 0, 200, 1, 44, 0, 0, 0, 32, 0, 32, 0, 32, 0, 0, 2, 2, 2, 0, 255, 255, 255},
},
{"a\u0301",
[]byte{0, 102, 0, 0, 0, 32, 0, 0, 2, 0, 255},
},
{"aaaaa",
[]byte{0, 100, 0, 100, 0, 100, 0, 100, 0, 100, 0, 0,
0, 32, 0, 32, 0, 32, 0, 32, 0, 32, 0, 0,
2, 2, 2, 2, 2, 0,
255, 255, 255, 255, 255,
},
},
// Issue 16391: incomplete rune at end of UTF-8 sequence.
{"\xc2", []byte{133, 255, 253, 0, 0, 0, 32, 0, 0, 2, 0, 255}},
{"\xc2a", []byte{133, 255, 253, 0, 100, 0, 0, 0, 32, 0, 32, 0, 0, 2, 2, 0, 255, 255}},
}
func TestKey(t *testing.T) {
c, _ := makeTable(appendNextTests[4].in)
c.alternate = altShifted
c.ignore = ignore(colltab.Quaternary)
buf := Buffer{}
keys1 := [][]byte{}
keys2 := [][]byte{}
for _, tt := range keyTests {
keys1 = append(keys1, c.Key(&buf, []byte(tt.in)))
keys2 = append(keys2, c.KeyFromString(&buf, tt.in))
}
// Separate generation from testing to ensure buffers are not overwritten.
for i, tt := range keyTests {
if !bytes.Equal(keys1[i], tt.out) {
t.Errorf("%d: Key(%q) = %d; want %d", i, tt.in, keys1[i], tt.out)
}
if !bytes.Equal(keys2[i], tt.out) {
t.Errorf("%d: KeyFromString(%q) = %d; want %d", i, tt.in, keys2[i], tt.out)
}
}
}
type compareTest struct {
a, b string
res int // comparison result
}
var compareTests = []compareTest{
{"a\u0301", "a", 1},
{"a\u0301b", "ab", 1},
{"a", "a\u0301", -1},
{"ab", "a\u0301b", -1},
{"bc", "a\u0301c", 1},
{"ab", "aB", -1},
{"a\u0301", "a\u0301", 0},
{"a", "a", 0},
// Only clip prefixes of whole runes.
{"\u302E", "\u302F", 1},
// Don't clip prefixes when last rune of prefix may be part of contraction.
{"a\u035E", "a\u0301\u035F", -1},
{"a\u0301\u035Fb", "a\u0301\u035F", -1},
}
func TestCompare(t *testing.T) {
c, _ := makeTable(appendNextTests[4].in)
for i, tt := range compareTests {
if res := c.Compare([]byte(tt.a), []byte(tt.b)); res != tt.res {
t.Errorf("%d: Compare(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
}
if res := c.CompareString(tt.a, tt.b); res != tt.res {
t.Errorf("%d: CompareString(%q, %q) == %d; want %d", i, tt.a, tt.b, res, tt.res)
}
}
}
func TestNumeric(t *testing.T) {
c := New(language.English, Loose, Numeric)
for i, tt := range []struct {
a, b string
want int
}{
{"1", "2", -1},
{"2", "12", -1},
{"", "", -1}, // Fullwidth is sorted as usual.
{"₂", "₁₂", 1}, // Subscript is not sorted as numbers.
{"②", "①②", 1}, // Circled is not sorted as numbers.
{ // Imperial Aramaic, is not sorted as number.
"\U00010859",
"\U00010858\U00010859",
1,
},
{"12", "2", 1},
{"A-1", "A-2", -1},
{"A-2", "A-12", -1},
{"A-12", "A-2", 1},
{"A-0001", "A-1", 0},
} {
if got := c.CompareString(tt.a, tt.b); got != tt.want {
t.Errorf("%d: CompareString(%s, %s) = %d; want %d", i, tt.a, tt.b, got, tt.want)
}
}
}

View file

@ -1,51 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
// Export for testing.
// TODO: no longer necessary. Remove at some point.
import (
"fmt"
"golang.org/x/text/internal/colltab"
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
)
type Weights struct {
Primary, Secondary, Tertiary, Quaternary int
}
func W(ce ...int) Weights {
w := Weights{ce[0], defaultSecondary, defaultTertiary, 0}
if len(ce) > 1 {
w.Secondary = ce[1]
}
if len(ce) > 2 {
w.Tertiary = ce[2]
}
if len(ce) > 3 {
w.Quaternary = ce[3]
}
return w
}
func (w Weights) String() string {
return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
}
func convertFromWeights(ws []Weights) []colltab.Elem {
out := make([]colltab.Elem, len(ws))
for i, w := range ws {
out[i], _ = colltab.MakeElem(w.Primary, w.Secondary, w.Tertiary, 0)
if out[i] == colltab.Ignore && w.Quaternary > 0 {
out[i] = colltab.MakeQuaternary(w.Quaternary)
}
}
return out
}

View file

@ -1,209 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"reflect"
"strings"
"testing"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/language"
)
var (
defaultIgnore = ignore(colltab.Tertiary)
defaultTable = getTable(locales[0])
)
func TestOptions(t *testing.T) {
for i, tt := range []struct {
in []Option
out options
}{
0: {
out: options{
ignore: defaultIgnore,
},
},
1: {
in: []Option{IgnoreDiacritics},
out: options{
ignore: [colltab.NumLevels]bool{false, true, false, true, true},
},
},
2: {
in: []Option{IgnoreCase, IgnoreDiacritics},
out: options{
ignore: ignore(colltab.Primary),
},
},
3: {
in: []Option{ignoreDiacritics, IgnoreWidth},
out: options{
ignore: ignore(colltab.Primary),
caseLevel: true,
},
},
4: {
in: []Option{IgnoreWidth, ignoreDiacritics},
out: options{
ignore: ignore(colltab.Primary),
caseLevel: true,
},
},
5: {
in: []Option{IgnoreCase, IgnoreWidth},
out: options{
ignore: ignore(colltab.Secondary),
},
},
6: {
in: []Option{IgnoreCase, IgnoreWidth, Loose},
out: options{
ignore: ignore(colltab.Primary),
},
},
7: {
in: []Option{Force, IgnoreCase, IgnoreWidth, Loose},
out: options{
ignore: [colltab.NumLevels]bool{false, true, true, true, false},
},
},
8: {
in: []Option{IgnoreDiacritics, IgnoreCase},
out: options{
ignore: ignore(colltab.Primary),
},
},
9: {
in: []Option{Numeric},
out: options{
ignore: defaultIgnore,
numeric: true,
},
},
10: {
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-level1"))},
out: options{
ignore: ignore(colltab.Primary),
},
},
11: {
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-level4"))},
out: options{
ignore: ignore(colltab.Quaternary),
},
},
12: {
in: []Option{OptionsFromTag(language.MustParse("und-u-ks-identic"))},
out: options{},
},
13: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
},
out: options{
ignore: defaultIgnore,
caseLevel: true,
backwards: true,
numeric: true,
},
},
14: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
OptionsFromTag(language.MustParse("und-u-kn-false-kb-false-kc-false")),
},
out: options{
ignore: defaultIgnore,
},
},
15: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-kn-true-kb-true-kc-true")),
OptionsFromTag(language.MustParse("und-u-kn-foo-kb-foo-kc-foo")),
},
out: options{
ignore: defaultIgnore,
caseLevel: true,
backwards: true,
numeric: true,
},
},
16: { // Normal options take precedence over tag options.
in: []Option{
Numeric, IgnoreCase,
OptionsFromTag(language.MustParse("und-u-kn-false-kc-true")),
},
out: options{
ignore: ignore(colltab.Secondary),
caseLevel: false,
numeric: true,
},
},
17: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-ka-shifted")),
},
out: options{
ignore: defaultIgnore,
alternate: altShifted,
},
},
18: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-ka-blanked")),
},
out: options{
ignore: defaultIgnore,
alternate: altBlanked,
},
},
19: {
in: []Option{
OptionsFromTag(language.MustParse("und-u-ka-posix")),
},
out: options{
ignore: defaultIgnore,
alternate: altShiftTrimmed,
},
},
} {
c := newCollator(defaultTable)
c.t = nil
c.variableTop = 0
c.f = 0
c.setOptions(tt.in)
if !reflect.DeepEqual(c.options, tt.out) {
t.Errorf("%d: got %v; want %v", i, c.options, tt.out)
}
}
}
func TestAlternateSortTypes(t *testing.T) {
testCases := []struct {
lang string
in []string
want []string
}{{
lang: "zh,cmn,zh-Hant-u-co-pinyin,zh-HK-u-co-pinyin,zh-pinyin",
in: []string{"爸爸", "妈妈", "儿子", "女儿"},
want: []string{"爸爸", "儿子", "妈妈", "女儿"},
}, {
lang: "zh-Hant,zh-u-co-stroke,zh-Hant-u-co-stroke",
in: []string{"爸爸", "妈妈", "儿子", "女儿"},
want: []string{"儿子", "女儿", "妈妈", "爸爸"},
}}
for _, tc := range testCases {
for _, tag := range strings.Split(tc.lang, ",") {
got := append([]string{}, tc.in...)
New(language.MustParse(tag)).SortStrings(got)
if !reflect.DeepEqual(got, tc.want) {
t.Errorf("New(%s).SortStrings(%v) = %v; want %v", tag, tc.in, got, tc.want)
}
}
}
}

View file

@ -1,230 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"archive/zip"
"bufio"
"bytes"
"flag"
"io"
"io/ioutil"
"log"
"path"
"regexp"
"strconv"
"strings"
"testing"
"unicode/utf8"
"golang.org/x/text/collate/build"
"golang.org/x/text/internal/gen"
"golang.org/x/text/language"
)
var long = flag.Bool("long", false,
"run time-consuming tests, such as tests that fetch data online")
// This regression test runs tests for the test files in CollationTest.zip
// (taken from http://www.unicode.org/Public/UCA/<gen.UnicodeVersion()>/).
//
// The test files have the following form:
// # header
// 0009 0021; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 025E]
// 0009 003F; # ('\u0009') <CHARACTER TABULATION> [| | | 0201 0263]
// 000A 0021; # ('\u000A') <LINE FEED (LF)> [| | | 0202 025E]
// 000A 003F; # ('\u000A') <LINE FEED (LF)> [| | | 0202 0263]
//
// The part before the semicolon is the hex representation of a sequence
// of runes. After the hash mark is a comment. The strings
// represented by rune sequence are in the file in sorted order, as
// defined by the DUCET.
type Test struct {
name string
str [][]byte
comment []string
}
var versionRe = regexp.MustCompile(`# UCA Version: (.*)\n?$`)
var testRe = regexp.MustCompile(`^([\dA-F ]+);.*# (.*)\n?$`)
func TestCollation(t *testing.T) {
if !gen.IsLocal() && !*long {
t.Skip("skipping test to prevent downloading; to run use -long or use -local to specify a local source")
}
t.Skip("must first update to new file format to support test")
for _, test := range loadTestData() {
doTest(t, test)
}
}
func Error(e error) {
if e != nil {
log.Fatal(e)
}
}
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
r := gen.OpenUnicodeFile("UCA", "", "allkeys.txt")
defer r.Close()
input := bufio.NewReader(r)
colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
for i := 1; true; i++ {
l, prefix, err := input.ReadLine()
if err == io.EOF {
break
}
Error(err)
line := string(l)
if prefix {
log.Fatalf("%d: buffer overflow", i)
}
if len(line) == 0 || line[0] == '#' {
continue
}
if line[0] == '@' {
if strings.HasPrefix(line[1:], "version ") {
if v := strings.Split(line[1:], " ")[1]; v != gen.UnicodeVersion() {
log.Fatalf("incompatible version %s; want %s", v, gen.UnicodeVersion())
}
}
} else {
// parse entries
part := strings.Split(line, " ; ")
if len(part) != 2 {
log.Fatalf("%d: production rule without ';': %v", i, line)
}
lhs := []rune{}
for _, v := range strings.Split(part[0], " ") {
if v != "" {
lhs = append(lhs, rune(convHex(i, v)))
}
}
vars := []int{}
rhs := [][]int{}
for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
if m[1] == "*" {
vars = append(vars, i)
}
elem := []int{}
for _, h := range strings.Split(m[2], ".") {
elem = append(elem, convHex(i, h))
}
rhs = append(rhs, elem)
}
builder.Add(lhs, rhs, vars)
}
}
}
func convHex(line int, s string) int {
r, e := strconv.ParseInt(s, 16, 32)
if e != nil {
log.Fatalf("%d: %v", line, e)
}
return int(r)
}
func loadTestData() []Test {
f := gen.OpenUnicodeFile("UCA", "", "CollationTest.zip")
buffer, err := ioutil.ReadAll(f)
f.Close()
Error(err)
archive, err := zip.NewReader(bytes.NewReader(buffer), int64(len(buffer)))
Error(err)
tests := []Test{}
for _, f := range archive.File {
// Skip the short versions, which are simply duplicates of the long versions.
if strings.Contains(f.Name, "SHORT") || f.FileInfo().IsDir() {
continue
}
ff, err := f.Open()
Error(err)
defer ff.Close()
scanner := bufio.NewScanner(ff)
test := Test{name: path.Base(f.Name)}
for scanner.Scan() {
line := scanner.Text()
if len(line) <= 1 || line[0] == '#' {
if m := versionRe.FindStringSubmatch(line); m != nil {
if m[1] != gen.UnicodeVersion() {
log.Printf("warning:%s: version is %s; want %s", f.Name, m[1], gen.UnicodeVersion())
}
}
continue
}
m := testRe.FindStringSubmatch(line)
if m == nil || len(m) < 3 {
log.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
}
str := []byte{}
// In the regression test data (unpaired) surrogates are assigned a weight
// corresponding to their code point value. However, utf8.DecodeRune,
// which is used to compute the implicit weight, assigns FFFD to surrogates.
// We therefore skip tests with surrogates. This skips about 35 entries
// per test.
valid := true
for _, split := range strings.Split(m[1], " ") {
r, err := strconv.ParseUint(split, 16, 64)
Error(err)
valid = valid && utf8.ValidRune(rune(r))
str = append(str, string(rune(r))...)
}
if valid {
test.str = append(test.str, str)
test.comment = append(test.comment, m[2])
}
}
if scanner.Err() != nil {
log.Fatal(scanner.Err())
}
tests = append(tests, test)
}
return tests
}
var errorCount int
func runes(b []byte) []rune {
return []rune(string(b))
}
var shifted = language.MustParse("und-u-ka-shifted-ks-level4")
func doTest(t *testing.T, tc Test) {
bld := build.NewBuilder()
parseUCA(bld)
w, err := bld.Build()
Error(err)
var tag language.Tag
if !strings.Contains(tc.name, "NON_IGNOR") {
tag = shifted
}
c := NewFromTable(w, OptionsFromTag(tag))
b := &Buffer{}
prev := tc.str[0]
for i := 1; i < len(tc.str); i++ {
b.Reset()
s := tc.str[i]
ka := c.Key(b, prev)
kb := c.Key(b, s)
if r := bytes.Compare(ka, kb); r == 1 {
t.Errorf("%s:%d: Key(%.4X) < Key(%.4X) (%X < %X) == %d; want -1 or 0", tc.name, i, []rune(string(prev)), []rune(string(s)), ka, kb, r)
prev = s
continue
}
if r := c.Compare(prev, s); r == 1 {
t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want -1 or 0", tc.name, i, runes(prev), runes(s), r)
}
if r := c.Compare(s, prev); r == -1 {
t.Errorf("%s:%d: Compare(%.4X, %.4X) == %d; want 1 or 0", tc.name, i, runes(s), runes(prev), r)
}
prev = s
}
}

View file

@ -1,55 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate_test
import (
"fmt"
"testing"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func ExampleCollator_Strings() {
c := collate.New(language.Und)
strings := []string{
"ad",
"ab",
"äb",
"ac",
}
c.SortStrings(strings)
fmt.Println(strings)
// Output: [ab äb ac ad]
}
type sorter []string
func (s sorter) Len() int {
return len(s)
}
func (s sorter) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sorter) Bytes(i int) []byte {
return []byte(s[i])
}
func TestSort(t *testing.T) {
c := collate.New(language.English)
strings := []string{
"bcd",
"abc",
"ddd",
}
c.Sort(sorter(strings))
res := fmt.Sprint(strings)
want := "[abc bcd ddd]"
if res != want {
t.Errorf("found %s; want %s", res, want)
}
}

View file

@ -1,291 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"testing"
"golang.org/x/text/collate/build"
"golang.org/x/text/internal/colltab"
"golang.org/x/text/unicode/norm"
)
type ColElems []Weights
type input struct {
str string
ces [][]int
}
type check struct {
in string
n int
out ColElems
}
type tableTest struct {
in []input
chk []check
}
func w(ce ...int) Weights {
return W(ce...)
}
var defaults = w(0)
func pt(p, t int) []int {
return []int{p, defaults.Secondary, t}
}
func makeTable(in []input) (*Collator, error) {
b := build.NewBuilder()
for _, r := range in {
if e := b.Add([]rune(r.str), r.ces, nil); e != nil {
panic(e)
}
}
t, err := b.Build()
if err != nil {
return nil, err
}
return NewFromTable(t), nil
}
// modSeq holds a seqeunce of modifiers in increasing order of CCC long enough
// to cause a segment overflow if not handled correctly. The last rune in this
// list has a CCC of 214.
var modSeq = []rune{
0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7, 0x05B8, 0x05B9, 0x05BB,
0x05BC, 0x05BD, 0x05BF, 0x05C1, 0x05C2, 0xFB1E, 0x064B, 0x064C, 0x064D, 0x064E,
0x064F, 0x0650, 0x0651, 0x0652, 0x0670, 0x0711, 0x0C55, 0x0C56, 0x0E38, 0x0E48,
0x0EB8, 0x0EC8, 0x0F71, 0x0F72, 0x0F74, 0x0321, 0x1DCE,
}
var mods []input
var modW = func() ColElems {
ws := ColElems{}
for _, r := range modSeq {
rune := norm.NFC.PropertiesString(string(r))
ws = append(ws, w(0, int(rune.CCC())))
mods = append(mods, input{string(r), [][]int{{0, int(rune.CCC())}}})
}
return ws
}()
var appendNextTests = []tableTest{
{ // test getWeights
[]input{
{"a", [][]int{{100}}},
{"b", [][]int{{105}}},
{"c", [][]int{{110}}},
{"ß", [][]int{{120}}},
},
[]check{
{"a", 1, ColElems{w(100)}},
{"b", 1, ColElems{w(105)}},
{"c", 1, ColElems{w(110)}},
{"d", 1, ColElems{w(0x50064)}},
{"ab", 1, ColElems{w(100)}},
{"bc", 1, ColElems{w(105)}},
{"dd", 1, ColElems{w(0x50064)}},
{"ß", 2, ColElems{w(120)}},
},
},
{ // test expansion
[]input{
{"u", [][]int{{100}}},
{"U", [][]int{{100}, {0, 25}}},
{"w", [][]int{{100}, {100}}},
{"W", [][]int{{100}, {0, 25}, {100}, {0, 25}}},
},
[]check{
{"u", 1, ColElems{w(100)}},
{"U", 1, ColElems{w(100), w(0, 25)}},
{"w", 1, ColElems{w(100), w(100)}},
{"W", 1, ColElems{w(100), w(0, 25), w(100), w(0, 25)}},
},
},
{ // test decompose
[]input{
{"D", [][]int{pt(104, 8)}},
{"z", [][]int{pt(130, 8)}},
{"\u030C", [][]int{{0, 40}}}, // Caron
{"\u01C5", [][]int{pt(104, 9), pt(130, 4), {0, 40, 0x1F}}}, // Dž = D+z+caron
},
[]check{
{"\u01C5", 2, ColElems{w(pt(104, 9)...), w(pt(130, 4)...), w(0, 40, 0x1F)}},
},
},
{ // test basic contraction
[]input{
{"a", [][]int{{100}}},
{"ab", [][]int{{101}}},
{"aab", [][]int{{101}, {101}}},
{"abc", [][]int{{102}}},
{"b", [][]int{{200}}},
{"c", [][]int{{300}}},
{"d", [][]int{{400}}},
},
[]check{
{"a", 1, ColElems{w(100)}},
{"aa", 1, ColElems{w(100)}},
{"aac", 1, ColElems{w(100)}},
{"d", 1, ColElems{w(400)}},
{"ab", 2, ColElems{w(101)}},
{"abb", 2, ColElems{w(101)}},
{"aab", 3, ColElems{w(101), w(101)}},
{"aaba", 3, ColElems{w(101), w(101)}},
{"abc", 3, ColElems{w(102)}},
{"abcd", 3, ColElems{w(102)}},
},
},
{ // test discontinuous contraction
append(mods, []input{
// modifiers; secondary weight equals ccc
{"\u0316", [][]int{{0, 220}}},
{"\u0317", [][]int{{0, 220}, {0, 220}}},
{"\u302D", [][]int{{0, 222}}},
{"\u302E", [][]int{{0, 225}}}, // used as starter
{"\u302F", [][]int{{0, 224}}}, // used as starter
{"\u18A9", [][]int{{0, 228}}},
{"\u0300", [][]int{{0, 230}}},
{"\u0301", [][]int{{0, 230}}},
{"\u0315", [][]int{{0, 232}}},
{"\u031A", [][]int{{0, 232}}},
{"\u035C", [][]int{{0, 233}}},
{"\u035F", [][]int{{0, 233}}},
{"\u035D", [][]int{{0, 234}}},
{"\u035E", [][]int{{0, 234}}},
{"\u0345", [][]int{{0, 240}}},
// starters
{"a", [][]int{{100}}},
{"b", [][]int{{200}}},
{"c", [][]int{{300}}},
{"\u03B1", [][]int{{900}}},
{"\x01", [][]int{{0, 0, 0, 0}}},
// contractions
{"a\u0300", [][]int{{101}}},
{"a\u0301", [][]int{{102}}},
{"a\u035E", [][]int{{110}}},
{"a\u035Eb\u035E", [][]int{{115}}},
{"ac\u035Eaca\u035E", [][]int{{116}}},
{"a\u035Db\u035D", [][]int{{117}}},
{"a\u0301\u035Db", [][]int{{120}}},
{"a\u0301\u035F", [][]int{{121}}},
{"a\u0301\u035Fb", [][]int{{119}}},
{"\u03B1\u0345", [][]int{{901}, {902}}},
{"\u302E\u302F", [][]int{{0, 131}, {0, 131}}},
{"\u302F\u18A9", [][]int{{0, 130}}},
}...),
[]check{
{"a\x01\u0300", 1, ColElems{w(100)}},
{"ab", 1, ColElems{w(100)}}, // closing segment
{"a\u0316\u0300b", 5, ColElems{w(101), w(0, 220)}}, // closing segment
{"a\u0316\u0300", 5, ColElems{w(101), w(0, 220)}}, // no closing segment
{"a\u0316\u0300\u035Cb", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
{"a\u0316\u0300\u035C", 5, ColElems{w(101), w(0, 220)}}, // completes before segment end
{"a\u0316\u0301b", 5, ColElems{w(102), w(0, 220)}}, // closing segment
{"a\u0316\u0301", 5, ColElems{w(102), w(0, 220)}}, // no closing segment
{"a\u0316\u0301\u035Cb", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
{"a\u0316\u0301\u035C", 5, ColElems{w(102), w(0, 220)}}, // completes before segment end
// match blocked by modifier with same ccc
{"a\u0301\u0315\u031A\u035Fb", 3, ColElems{w(102)}},
// multiple gaps
{"a\u0301\u035Db", 6, ColElems{w(120)}},
{"a\u0301\u035F", 5, ColElems{w(121)}},
{"a\u0301\u035Fb", 6, ColElems{w(119)}},
{"a\u0316\u0301\u035F", 7, ColElems{w(121), w(0, 220)}},
{"a\u0301\u0315\u035Fb", 7, ColElems{w(121), w(0, 232)}},
{"a\u0316\u0301\u0315\u035Db", 5, ColElems{w(102), w(0, 220)}},
{"a\u0316\u0301\u0315\u035F", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035Fb", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035F\u035D", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
{"a\u0316\u0301\u0315\u035F\u035Db", 9, ColElems{w(121), w(0, 220), w(0, 232)}},
// handling of segment overflow
{ // just fits within segment
"a" + string(modSeq[:30]) + "\u0301",
3 + len(string(modSeq[:30])),
append(ColElems{w(102)}, modW[:30]...),
},
{"a" + string(modSeq[:31]) + "\u0301", 1, ColElems{w(100)}}, // overflow
{"a" + string(modSeq) + "\u0301", 1, ColElems{w(100)}},
{ // just fits within segment with two interstitial runes
"a" + string(modSeq[:28]) + "\u0301\u0315\u035F",
7 + len(string(modSeq[:28])),
append(append(ColElems{w(121)}, modW[:28]...), w(0, 232)),
},
{ // second half does not fit within segment
"a" + string(modSeq[:29]) + "\u0301\u0315\u035F",
3 + len(string(modSeq[:29])),
append(ColElems{w(102)}, modW[:29]...),
},
// discontinuity can only occur in last normalization segment
{"a\u035Eb\u035E", 6, ColElems{w(115)}},
{"a\u0316\u035Eb\u035E", 5, ColElems{w(110), w(0, 220)}},
{"a\u035Db\u035D", 6, ColElems{w(117)}},
{"a\u0316\u035Db\u035D", 1, ColElems{w(100)}},
{"a\u035Eb\u0316\u035E", 8, ColElems{w(115), w(0, 220)}},
{"a\u035Db\u0316\u035D", 8, ColElems{w(117), w(0, 220)}},
{"ac\u035Eaca\u035E", 9, ColElems{w(116)}},
{"a\u0316c\u035Eaca\u035E", 1, ColElems{w(100)}},
{"ac\u035Eac\u0316a\u035E", 1, ColElems{w(100)}},
// expanding contraction
{"\u03B1\u0345", 4, ColElems{w(901), w(902)}},
// Theoretical possibilities
// contraction within a gap
{"a\u302F\u18A9\u0301", 9, ColElems{w(102), w(0, 130)}},
// expansion within a gap
{"a\u0317\u0301", 5, ColElems{w(102), w(0, 220), w(0, 220)}},
// repeating CCC blocks last modifier
{"a\u302E\u302F\u0301", 1, ColElems{w(100)}},
// The trailing combining characters (with lower CCC) should block the first one.
// TODO: make the following pass.
// {"a\u035E\u0316\u0316", 1, ColElems{w(100)}},
{"a\u035F\u035Eb", 5, ColElems{w(110), w(0, 233)}},
// Last combiner should match after normalization.
// TODO: make the following pass.
// {"a\u035D\u0301", 3, ColElems{w(102), w(0, 234)}},
// The first combiner is blocking the second one as they have the same CCC.
{"a\u035D\u035Eb", 1, ColElems{w(100)}},
},
},
}
func TestAppendNext(t *testing.T) {
for i, tt := range appendNextTests {
c, err := makeTable(tt.in)
if err != nil {
t.Errorf("%d: error creating table: %v", i, err)
continue
}
for j, chk := range tt.chk {
ws, n := c.t.AppendNext(nil, []byte(chk.in))
if n != chk.n {
t.Errorf("%d:%d: bytes consumed was %d; want %d", i, j, n, chk.n)
}
out := convertFromWeights(chk.out)
if len(ws) != len(out) {
t.Errorf("%d:%d: len(ws) was %d; want %d (%X vs %X)\n%X", i, j, len(ws), len(out), ws, out, chk.in)
continue
}
for k, w := range ws {
w, _ = colltab.MakeElem(w.Primary(), w.Secondary(), int(w.Tertiary()), 0)
if w != out[k] {
t.Errorf("%d:%d: Weights %d was %X; want %X", i, j, k, w, out[k])
}
}
}
}
}

View file

@ -1,258 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package charmap
import (
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
}
func TestNonRepertoire(t *testing.T) {
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, Windows1252, "\x81", "\ufffd"},
{encEBCDIC, CodePage037, "갂", ""},
{encEBCDIC, CodePage1047, "갂", ""},
{encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
{encEBCDIC, CodePage1140, "갂", ""},
{encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
{encASCIISuperset, Windows1252, "갂", ""},
{encASCIISuperset, Windows1252, "a갂", "a"},
{encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
dst, _, err := transform.String(tr, tc.src)
if err != wantErr {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
}
}
func TestBasics(t *testing.T) {
testCases := []struct {
e encoding.Encoding
encoded string
utf8 string
}{{
e: CodePage037,
encoded: "\xc8\x51\xba\x93\xcf",
utf8: "Hé[lõ",
}, {
e: CodePage437,
encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
utf8: "Héllô ¥º⌠£╛",
}, {
e: CodePage866,
encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
utf8: "Hє╙o Ш¤Я▌б",
}, {
e: CodePage1047,
encoded: "\xc8\x54\x93\x93\x9f",
utf8: "Hèll¤",
}, {
e: CodePage1140,
encoded: "\xc8\x9f\x93\x93\xcf",
utf8: "H€llõ",
}, {
e: ISO8859_2,
encoded: "Hel\xe5\xf5",
utf8: "Helĺő",
}, {
e: ISO8859_3,
encoded: "He\xbd\xd4",
utf8: "He½Ô",
}, {
e: ISO8859_4,
encoded: "Hel\xb6\xf8",
utf8: "Helļø",
}, {
e: ISO8859_5,
encoded: "H\xd7\xc6o",
utf8: "HзЦo",
}, {
e: ISO8859_6,
encoded: "Hel\xc2\xc9",
utf8: "Helآة",
}, {
e: ISO8859_7,
encoded: "H\xeel\xebo",
utf8: "Hξlλo",
}, {
e: ISO8859_8,
encoded: "Hel\xf5\xed",
utf8: "Helץם",
}, {
e: ISO8859_9,
encoded: "\xdeayet",
utf8: "Şayet",
}, {
e: ISO8859_10,
encoded: "H\xea\xbfo",
utf8: "Hęŋo",
}, {
e: ISO8859_13,
encoded: "H\xe6l\xf9o",
utf8: "Hęlło",
}, {
e: ISO8859_14,
encoded: "He\xfe\xd0o",
utf8: "HeŷŴo",
}, {
e: ISO8859_15,
encoded: "H\xa4ll\xd8",
utf8: "H€llØ",
}, {
e: ISO8859_16,
encoded: "H\xe6ll\xbd",
utf8: "Hællœ",
}, {
e: KOI8R,
encoded: "He\x93\xad\x9c",
utf8: "He⌠╜°",
}, {
e: KOI8U,
encoded: "He\x93\xad\x9c",
utf8: "He⌠ґ°",
}, {
e: Macintosh,
encoded: "He\xdf\xd7",
utf8: "Hefl◊",
}, {
e: MacintoshCyrillic,
encoded: "He\xbe\x94",
utf8: "HeЊФ",
}, {
e: Windows874,
encoded: "He\xb7\xf0",
utf8: "Heท",
}, {
e: Windows1250,
encoded: "He\xe5\xe5o",
utf8: "Heĺĺo",
}, {
e: Windows1251,
encoded: "H\xball\xfe",
utf8: "Hєllю",
}, {
e: Windows1252,
encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
utf8: "Héllô ¥º®£Ð",
}, {
e: Windows1253,
encoded: "H\xe5ll\xd6",
utf8: "HεllΦ",
}, {
e: Windows1254,
encoded: "\xd0ello",
utf8: "Ğello",
}, {
e: Windows1255,
encoded: "He\xd4o",
utf8: "Heװo",
}, {
e: Windows1256,
encoded: "H\xdbllo",
utf8: "Hغllo",
}, {
e: Windows1257,
encoded: "He\xeflo",
utf8: "Heļlo",
}, {
e: Windows1258,
encoded: "Hell\xf5",
utf8: "Hellơ",
}, {
e: XUserDefined,
encoded: "\x00\x40\x7f\x80\xab\xff",
utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
}
}
var windows1255TestCases = []struct {
b byte
ok bool
r rune
}{
{'\x00', true, '\u0000'},
{'\x1a', true, '\u001a'},
{'\x61', true, '\u0061'},
{'\x7f', true, '\u007f'},
{'\x80', true, '\u20ac'},
{'\x95', true, '\u2022'},
{'\xa0', true, '\u00a0'},
{'\xc0', true, '\u05b0'},
{'\xfc', true, '\ufffd'},
{'\xfd', true, '\u200e'},
{'\xfe', true, '\u200f'},
{'\xff', true, '\ufffd'},
{encoding.ASCIISub, false, '\u0400'},
{encoding.ASCIISub, false, '\u2603'},
{encoding.ASCIISub, false, '\U0001f4a9'},
}
func TestDecodeByte(t *testing.T) {
for _, tc := range windows1255TestCases {
if !tc.ok {
continue
}
got := Windows1255.DecodeByte(tc.b)
want := tc.r
if got != want {
t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
}
}
}
func TestEncodeRune(t *testing.T) {
for _, tc := range windows1255TestCases {
// There can be multiple tc.b values that map to tc.r = '\ufffd'.
if tc.r == '\ufffd' {
continue
}
gotB, gotOK := Windows1255.EncodeRune(tc.r)
wantB, wantOK := tc.b, tc.ok
if gotB != wantB || gotOK != wantOK {
t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
}
}
}
func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }

View file

@ -1,290 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package encoding_test
import (
"io/ioutil"
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/transform"
)
func TestEncodeInvalidUTF8(t *testing.T) {
inputs := []string{
"hello.",
"wo\ufffdld.",
"ABC\xff\x80\x80", // Invalid UTF-8.
"\x80\x80\x80\x80\x80",
"\x80\x80D\x80\x80", // Valid rune at "D".
"E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
"G",
"H\xe2\x82", // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
"\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
}
// Each invalid source byte becomes '\x1a'.
want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
gotBuf := make([]byte, 0, 1024)
src := make([]byte, 0, 1024)
for i, input := range inputs {
dst := make([]byte, 1024)
src = append(src, input...)
atEOF := i == len(inputs)-1
nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
gotBuf = append(gotBuf, dst[:nDst]...)
src = src[nSrc:]
if err != nil && err != transform.ErrShortSrc {
t.Fatalf("i=%d: %v", i, err)
}
if atEOF && err != nil {
t.Fatalf("i=%d: atEOF: %v", i, err)
}
}
if got := string(gotBuf); got != want {
t.Fatalf("\ngot %+q\nwant %+q", got, want)
}
}
func TestReplacement(t *testing.T) {
for _, direction := range []string{"Decode", "Encode"} {
enc, want := (transform.Transformer)(nil), ""
if direction == "Decode" {
enc = encoding.Replacement.NewDecoder()
want = "\ufffd"
} else {
enc = encoding.Replacement.NewEncoder()
want = "AB\x00CD\ufffdYZ"
}
sr := strings.NewReader("AB\x00CD\x80YZ")
g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
if err != nil {
t.Errorf("%s: ReadAll: %v", direction, err)
continue
}
if got := string(g); got != want {
t.Errorf("%s:\ngot %q\nwant %q", direction, got, want)
continue
}
}
}
func TestUTF8Validator(t *testing.T) {
testCases := []struct {
desc string
dstSize int
src string
atEOF bool
want string
wantErr error
}{
{
"empty input",
100,
"",
false,
"",
nil,
},
{
"valid 1-byte 1-rune input",
100,
"a",
false,
"a",
nil,
},
{
"valid 3-byte 1-rune input",
100,
"\u1234",
false,
"\u1234",
nil,
},
{
"valid 5-byte 3-rune input",
100,
"a\u0100\u0101",
false,
"a\u0100\u0101",
nil,
},
{
"perfectly sized dst (non-ASCII)",
5,
"a\u0100\u0101",
false,
"a\u0100\u0101",
nil,
},
{
"short dst (non-ASCII)",
4,
"a\u0100\u0101",
false,
"a\u0100",
transform.ErrShortDst,
},
{
"perfectly sized dst (ASCII)",
5,
"abcde",
false,
"abcde",
nil,
},
{
"short dst (ASCII)",
4,
"abcde",
false,
"abcd",
transform.ErrShortDst,
},
{
"partial input (!EOF)",
100,
"a\u0100\xf1",
false,
"a\u0100",
transform.ErrShortSrc,
},
{
"invalid input (EOF)",
100,
"a\u0100\xf1",
true,
"a\u0100",
encoding.ErrInvalidUTF8,
},
{
"invalid input (!EOF)",
100,
"a\u0100\x80",
false,
"a\u0100",
encoding.ErrInvalidUTF8,
},
{
"invalid input (above U+10FFFF)",
100,
"a\u0100\xf7\xbf\xbf\xbf",
false,
"a\u0100",
encoding.ErrInvalidUTF8,
},
{
"invalid input (surrogate half)",
100,
"a\u0100\xed\xa0\x80",
false,
"a\u0100",
encoding.ErrInvalidUTF8,
},
}
for _, tc := range testCases {
dst := make([]byte, tc.dstSize)
nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
if nDst < 0 || len(dst) < nDst {
t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
continue
}
got := string(dst[:nDst])
if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v",
tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
continue
}
}
}
func TestErrorHandler(t *testing.T) {
testCases := []struct {
desc string
handler func(*encoding.Encoder) *encoding.Encoder
sizeDst int
src, want string
nSrc int
err error
}{
{
desc: "one rune replacement",
handler: encoding.ReplaceUnsupported,
sizeDst: 100,
src: "\uAC00",
want: "\x1a",
nSrc: 3,
},
{
desc: "mid-stream rune replacement",
handler: encoding.ReplaceUnsupported,
sizeDst: 100,
src: "a\uAC00bcd\u00e9",
want: "a\x1abcd\xe9",
nSrc: 9,
},
{
desc: "at end rune replacement",
handler: encoding.ReplaceUnsupported,
sizeDst: 10,
src: "\u00e9\uAC00",
want: "\xe9\x1a",
nSrc: 5,
},
{
desc: "short buffer replacement",
handler: encoding.ReplaceUnsupported,
sizeDst: 1,
src: "\u00e9\uAC00",
want: "\xe9",
nSrc: 2,
err: transform.ErrShortDst,
},
{
desc: "one rune html escape",
handler: encoding.HTMLEscapeUnsupported,
sizeDst: 100,
src: "\uAC00",
want: "&#44032;",
nSrc: 3,
},
{
desc: "mid-stream html escape",
handler: encoding.HTMLEscapeUnsupported,
sizeDst: 100,
src: "\u00e9\uAC00dcba",
want: "\xe9&#44032;dcba",
nSrc: 9,
},
{
desc: "short buffer html escape",
handler: encoding.HTMLEscapeUnsupported,
sizeDst: 9,
src: "ab\uAC01",
want: "ab",
nSrc: 2,
err: transform.ErrShortDst,
},
}
for i, tc := range testCases {
tr := tc.handler(charmap.Windows1250.NewEncoder())
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
}
}
}

View file

@ -1,42 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package encoding_test
import (
"fmt"
"io"
"os"
"strings"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/transform"
)
func ExampleDecodeWindows1252() {
sr := strings.NewReader("Gar\xe7on !")
tr := charmap.Windows1252.NewDecoder().Reader(sr)
io.Copy(os.Stdout, tr)
// Output: Garçon !
}
func ExampleUTF8Validator() {
for i := 0; i < 2; i++ {
var transformer transform.Transformer
transformer = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
if i == 1 {
transformer = transform.Chain(encoding.UTF8Validator, transformer)
}
dst := make([]byte, 256)
src := []byte("abc\xffxyz") // src is invalid UTF-8.
nDst, nSrc, err := transformer.Transform(dst, src, true)
fmt.Printf("i=%d: produced %q, consumed %q, error %v\n",
i, dst[:nDst], src[:nSrc], err)
}
// Output:
// i=0: produced "\x00a\x00b\x00c\xff\xfd\x00x\x00y\x00z", consumed "abc\xffxyz", error <nil>
// i=1: produced "\x00a\x00b\x00c", consumed "abc", error encoding: invalid UTF-8
}

View file

@ -1,144 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package htmlindex
import (
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/internal/identifier"
"golang.org/x/text/encoding/unicode"
"golang.org/x/text/language"
)
func TestGet(t *testing.T) {
for i, tc := range []struct {
name string
canonical string
err error
}{
{"utf-8", "utf-8", nil},
{" utf-8 ", "utf-8", nil},
{" l5 ", "windows-1254", nil},
{"latin5 ", "windows-1254", nil},
{"latin 5", "", errInvalidName},
{"latin-5", "", errInvalidName},
} {
enc, err := Get(tc.name)
if err != tc.err {
t.Errorf("%d: error was %v; want %v", i, err, tc.err)
}
if err != nil {
continue
}
if got, err := Name(enc); got != tc.canonical {
t.Errorf("%d: Name(Get(%q)) = %q; want %q (%v)", i, tc.name, got, tc.canonical, err)
}
}
}
func TestTables(t *testing.T) {
for name, index := range nameMap {
got, err := Get(name)
if err != nil {
t.Errorf("%s:err: expected non-nil error", name)
}
if want := encodings[index]; got != want {
t.Errorf("%s:encoding: got %v; want %v", name, got, want)
}
mib, _ := got.(identifier.Interface).ID()
if mibMap[mib] != index {
t.Errorf("%s:mibMab: got %d; want %d", name, mibMap[mib], index)
}
}
}
func TestName(t *testing.T) {
for i, tc := range []struct {
desc string
enc encoding.Encoding
name string
err error
}{{
"defined encoding",
charmap.ISO8859_2,
"iso-8859-2",
nil,
}, {
"defined Unicode encoding",
unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
"utf-16be",
nil,
}, {
"undefined Unicode encoding in HTML standard",
unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
"",
errUnsupported,
}, {
"undefined other encoding in HTML standard",
charmap.CodePage437,
"",
errUnsupported,
}, {
"unknown encoding",
encoding.Nop,
"",
errUnknown,
}} {
name, err := Name(tc.enc)
if name != tc.name || err != tc.err {
t.Errorf("%d:%s: got %q, %v; want %q, %v", i, tc.desc, name, err, tc.name, tc.err)
}
}
}
func TestLanguageDefault(t *testing.T) {
for _, tc := range []struct{ tag, want string }{
{"und", "windows-1252"}, // The default value.
{"ar", "windows-1256"},
{"ba", "windows-1251"},
{"be", "windows-1251"},
{"bg", "windows-1251"},
{"cs", "windows-1250"},
{"el", "iso-8859-7"},
{"et", "windows-1257"},
{"fa", "windows-1256"},
{"he", "windows-1255"},
{"hr", "windows-1250"},
{"hu", "iso-8859-2"},
{"ja", "shift_jis"},
{"kk", "windows-1251"},
{"ko", "euc-kr"},
{"ku", "windows-1254"},
{"ky", "windows-1251"},
{"lt", "windows-1257"},
{"lv", "windows-1257"},
{"mk", "windows-1251"},
{"pl", "iso-8859-2"},
{"ru", "windows-1251"},
{"sah", "windows-1251"},
{"sk", "windows-1250"},
{"sl", "iso-8859-2"},
{"sr", "windows-1251"},
{"tg", "windows-1251"},
{"th", "windows-874"},
{"tr", "windows-1254"},
{"tt", "windows-1251"},
{"uk", "windows-1251"},
{"vi", "windows-1258"},
{"zh-hans", "gb18030"},
{"zh-hant", "big5"},
// Variants and close approximates of the above.
{"ar_EG", "windows-1256"},
{"bs", "windows-1250"}, // Bosnian Latin maps to Croatian.
// Use default fallback in case of miss.
{"nl", "windows-1252"},
} {
if got := LanguageDefault(language.MustParse(tc.tag)); got != tc.want {
t.Errorf("LanguageDefault(%s) = %s; want %s", tc.tag, got, tc.want)
}
}
}

View file

@ -1,248 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package japanese
import (
"fmt"
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func TestNonRepertoire(t *testing.T) {
// Pick n to cause the destination buffer in transform.String to overflow.
const n = 100
long := strings.Repeat(".", n)
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{enc, EUCJP, "갂", ""},
{enc, EUCJP, "a갂", "a"},
{enc, EUCJP, "丌갂", "\x8f\xb0\xa4"},
{enc, ISO2022JP, "갂", ""},
{enc, ISO2022JP, "a갂", "a"},
{enc, ISO2022JP, "朗갂", "\x1b$BzF\x1b(B"}, // switch back to ASCII mode at end
{enc, ShiftJIS, "갂", ""},
{enc, ShiftJIS, "a갂", "a"},
{enc, ShiftJIS, "\u2190갂", "\x81\xa9"},
// Continue correctly after errors
{dec, EUCJP, "\x8e\xa0", "\ufffd\ufffd"},
{dec, EUCJP, "\x8e\xe0", "\ufffd"},
{dec, EUCJP, "\x8e\xff", "\ufffd\ufffd"},
{dec, EUCJP, "\x8ea", "\ufffda"},
{dec, EUCJP, "\x8f\xa0", "\ufffd\ufffd"},
{dec, EUCJP, "\x8f\xa1\xa0", "\ufffd\ufffd"},
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
{dec, EUCJP, "\x8f\xa2\xa2", "\ufffd"},
{dec, EUCJP, "\xfe", "\ufffd"},
{dec, EUCJP, "\xfe\xfc", "\ufffd"},
{dec, EUCJP, "\xfe\xff", "\ufffd\ufffd"},
// Correct handling of end of source
{dec, EUCJP, strings.Repeat("\x8e", n), strings.Repeat("\ufffd", n)},
{dec, EUCJP, strings.Repeat("\x8f", n), strings.Repeat("\ufffd", n)},
{dec, EUCJP, strings.Repeat("\x8f\xa0", n), strings.Repeat("\ufffd", 2*n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1", n), "a" + strings.Repeat("\ufffd", n)},
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1\xff", n), "a" + strings.Repeat("\ufffd", 2*n)},
// Continue correctly after errors
{dec, ShiftJIS, "\x80", "\u0080"}, // It's what the spec says.
{dec, ShiftJIS, "\x81", "\ufffd"},
{dec, ShiftJIS, "\x81\x7f", "\ufffd\u007f"},
{dec, ShiftJIS, "\xe0", "\ufffd"},
{dec, ShiftJIS, "\xe0\x39", "\ufffd\u0039"},
{dec, ShiftJIS, "\xe0\x9f", "燹"},
{dec, ShiftJIS, "\xe0\xfd", "\ufffd"},
{dec, ShiftJIS, "\xef\xfc", "\ufffd"},
{dec, ShiftJIS, "\xfc\xfc", "\ufffd"},
{dec, ShiftJIS, "\xfc\xfd", "\ufffd"},
{dec, ShiftJIS, "\xfdaa", "\ufffdaa"},
{dec, ShiftJIS, strings.Repeat("\x81\x81", n), strings.Repeat("", n)},
{dec, ShiftJIS, strings.Repeat("\xe0\xfd", n), strings.Repeat("\ufffd", n)},
{dec, ShiftJIS, "a" + strings.Repeat("\xe0\xfd", n), "a" + strings.Repeat("\ufffd", n)},
{dec, ISO2022JP, "\x1b$", "\ufffd$"},
{dec, ISO2022JP, "\x1b(", "\ufffd("},
{dec, ISO2022JP, "\x1b@", "\ufffd@"},
{dec, ISO2022JP, "\x1bZ", "\ufffdZ"},
// incomplete escapes
{dec, ISO2022JP, "\x1b$", "\ufffd$"},
{dec, ISO2022JP, "\x1b$J.", "\ufffd$J."}, // illegal
{dec, ISO2022JP, "\x1b$B.", "\ufffd"}, // JIS208
{dec, ISO2022JP, "\x1b$(", "\ufffd$("}, // JIS212
{dec, ISO2022JP, "\x1b$(..", "\ufffd$(.."}, // JIS212
{dec, ISO2022JP, "\x1b$(" + long, "\ufffd$(" + long}, // JIS212
{dec, ISO2022JP, "\x1b$(D.", "\ufffd"}, // JIS212
{dec, ISO2022JP, "\x1b$(D..", "\ufffd"}, // JIS212
{dec, ISO2022JP, "\x1b$(D...", "\ufffd\ufffd"}, // JIS212
{dec, ISO2022JP, "\x1b(B.", "."}, // ascii
{dec, ISO2022JP, "\x1b(B..", ".."}, // ascii
{dec, ISO2022JP, "\x1b(J.", "."}, // roman
{dec, ISO2022JP, "\x1b(J..", ".."}, // roman
{dec, ISO2022JP, "\x1b(I\x20", "\ufffd"}, // katakana
{dec, ISO2022JP, "\x1b(I\x20\x20", "\ufffd\ufffd"}, // katakana
// recover to same state
{dec, ISO2022JP, "\x1b(B\x1b.", "\ufffd."},
{dec, ISO2022JP, "\x1b(I\x1b.", "\ufffdョ"},
{dec, ISO2022JP, "\x1b(I\x1b$.", "\ufffd、ョ"},
{dec, ISO2022JP, "\x1b(I\x1b(.", "\ufffdィョ"},
{dec, ISO2022JP, "\x1b$B\x7e\x7e", "\ufffd"},
{dec, ISO2022JP, "\x1b$@\x0a.", "\x0a."},
{dec, ISO2022JP, "\x1b$B\x0a.", "\x0a."},
{dec, ISO2022JP, "\x1b$(D\x0a.", "\x0a."},
{dec, ISO2022JP, "\x1b$(D\x7e\x7e", "\ufffd"},
{dec, ISO2022JP, "\x80", "\ufffd"},
// TODO: according to https://encoding.spec.whatwg.org/#iso-2022-jp,
// these should all be correct.
// {dec, ISO2022JP, "\x1b(B\x0E", "\ufffd"},
// {dec, ISO2022JP, "\x1b(B\x0F", "\ufffd"},
{dec, ISO2022JP, "\x1b(B\x5C", "\u005C"},
{dec, ISO2022JP, "\x1b(B\x7E", "\u007E"},
// {dec, ISO2022JP, "\x1b(J\x0E", "\ufffd"},
// {dec, ISO2022JP, "\x1b(J\x0F", "\ufffd"},
// {dec, ISO2022JP, "\x1b(J\x5C", "\u00A5"},
// {dec, ISO2022JP, "\x1b(J\x7E", "\u203E"},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
dst := make([]byte, 100000)
src := []byte(tc.src)
for i := 0; i <= len(tc.src); i++ {
nDst, nSrc, err := tr.Transform(dst, src[:i], false)
if err != nil && err != transform.ErrShortSrc && err != wantErr {
t.Fatalf("error on first call to Transform: %v", err)
}
n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
nDst += n
if err != wantErr {
t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
}
if got := string(dst[:nDst]); got != tc.want {
t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
}
}
})
}
}
func TestCorrect(t *testing.T) {
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, ShiftJIS, "\x9f\xfc", "滌"},
{dec, ShiftJIS, "\xfb\xfc", "髙"},
{dec, ShiftJIS, "\xfa\xb1", "﨑"},
{enc, ShiftJIS, "滌", "\x9f\xfc"},
{enc, ShiftJIS, "﨑", "\xed\x95"},
}
for _, tc := range testCases {
dir, tr, _ := tc.init(tc.e)
dst, _, err := transform.String(tr, tc.src)
if err != nil {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, nil)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
}
}
func TestBasics(t *testing.T) {
// The encoded forms can be verified by the iconv program:
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
testCases := []struct {
e encoding.Encoding
encPrefix string
encSuffix string
encoded string
utf8 string
}{{
// "A。カ゚ 0208: etc 0212: etc" is a nonsense string that contains ASCII, half-width
// kana, JIS X 0208 (including two near the kink in the Shift JIS second byte
// encoding) and JIS X 0212 encodable codepoints.
//
// "月日は百代の過客にして、行かふ年も又旅人也。" is from the 17th century poem
// "Oku no Hosomichi" and contains both hiragana and kanji.
e: EUCJP,
encoded: "A\x8e\xa1\x8e\xb6\x8e\xdf " +
"0208: \xa1\xa1\xa1\xa2\xa1\xdf\xa1\xe0\xa1\xfd\xa1\xfe\xa2\xa1\xa2\xa2\xf4\xa6 " +
"0212: \x8f\xa2\xaf\x8f\xed\xe3",
utf8: "A。カ゚ " +
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199 " +
"0212: \u02d8\u9fa5",
}, {
e: EUCJP,
encoded: "\xb7\xee\xc6\xfc\xa4\xcf\xc9\xb4\xc2\xe5\xa4\xce\xb2\xe1\xb5\xd2" +
"\xa4\xcb\xa4\xb7\xa4\xc6\xa1\xa2\xb9\xd4\xa4\xab\xa4\xd5\xc7\xaf" +
"\xa4\xe2\xcb\xf4\xce\xb9\xbf\xcd\xcc\xe9\xa1\xa3",
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
}, {
e: ISO2022JP,
encSuffix: "\x1b\x28\x42",
encoded: "\x1b\x28\x49\x21\x36\x5f\x1b\x28\x42 " +
"0208: \x1b\x24\x42\x21\x21\x21\x22\x21\x5f\x21\x60\x21\x7d\x21\x7e\x22\x21\x22\x22\x74\x26",
utf8: "。カ゚ " +
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
}, {
e: ISO2022JP,
encPrefix: "\x1b\x24\x42",
encSuffix: "\x1b\x28\x42",
encoded: "\x37\x6e\x46\x7c\x24\x4f\x49\x34\x42\x65\x24\x4e\x32\x61\x35\x52" +
"\x24\x4b\x24\x37\x24\x46\x21\x22\x39\x54\x24\x2b\x24\x55\x47\x2f" +
"\x24\x62\x4b\x74\x4e\x39\x3f\x4d\x4c\x69\x21\x23",
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
}, {
e: ShiftJIS,
encoded: "A\xa1\xb6\xdf " +
"0208: \x81\x40\x81\x41\x81\x7e\x81\x80\x81\x9d\x81\x9e\x81\x9f\x81\xa0\xea\xa4",
utf8: "A。カ゚ " +
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
}, {
e: ShiftJIS,
encoded: "\x8c\x8e\x93\xfa\x82\xcd\x95\x53\x91\xe3\x82\xcc\x89\xdf\x8b\x71" +
"\x82\xc9\x82\xb5\x82\xc4\x81\x41\x8d\x73\x82\xa9\x82\xd3\x94\x4e" +
"\x82\xe0\x96\x94\x97\xb7\x90\x6c\x96\xe7\x81\x42",
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
}
}
func TestFiles(t *testing.T) {
enctest.TestFile(t, EUCJP)
enctest.TestFile(t, ISO2022JP)
enctest.TestFile(t, ShiftJIS)
}
func BenchmarkEncoding(b *testing.B) {
enctest.Benchmark(b, EUCJP)
enctest.Benchmark(b, ISO2022JP)
enctest.Benchmark(b, ShiftJIS)
}

View file

@ -1,94 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package korean
import (
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func TestNonRepertoire(t *testing.T) {
// Pick n large enough to cause an overflow in the destination buffer of
// transform.String.
const n = 10000
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, EUCKR, "\xfe\xfe", "\ufffd"},
// {dec, EUCKR, "א", "\ufffd"}, // TODO: why is this different?
{enc, EUCKR, "א", ""},
{enc, EUCKR, "aא", "a"},
{enc, EUCKR, "\uac00א", "\xb0\xa1"},
// TODO: should we also handle Jamo?
{dec, EUCKR, "\x80", "\ufffd"},
{dec, EUCKR, "\xff", "\ufffd"},
{dec, EUCKR, "\x81", "\ufffd"},
{dec, EUCKR, "\xb0\x40", "\ufffd@"},
{dec, EUCKR, "\xb0\xff", "\ufffd"},
{dec, EUCKR, "\xd0\x20", "\ufffd "},
{dec, EUCKR, "\xd0\xff", "\ufffd"},
{dec, EUCKR, strings.Repeat("\x81", n), strings.Repeat("걖", n/2)},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
dst, _, err := transform.String(tr, tc.src)
if err != wantErr {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
}
}
func TestBasics(t *testing.T) {
// The encoded forms can be verified by the iconv program:
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
testCases := []struct {
e encoding.Encoding
encoded string
utf8 string
}{{
// Korean tests.
//
// "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D" is a
// nonsense string that contains ASCII, Hangul and CJK ideographs.
//
// "세계야, 안녕" translates as "Hello, world".
e: EUCKR,
encoded: "A\x81\x41\x81\x61\x81\x81\xc6\xfeB\xc7\xa1\xc7\xfe\xc8\xa1C\xca\xa1\xfd\xfeD",
utf8: "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D",
}, {
e: EUCKR,
encoded: "\xbc\xbc\xb0\xe8\xbe\xdf\x2c\x20\xbe\xc8\xb3\xe7",
utf8: "세계야, 안녕",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
}
}
func TestFiles(t *testing.T) { enctest.TestFile(t, EUCKR) }
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, EUCKR) }

View file

@ -1,143 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package simplifiedchinese
import (
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func TestNonRepertoire(t *testing.T) {
// Pick n large enough to overflow the destination buffer of transform.String.
const n = 10000
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, GBK, "a\xfe\xfeb", "a\ufffdb"},
{dec, HZGB2312, "~{z~", "\ufffd"},
{enc, GBK, "갂", ""},
{enc, GBK, "a갂", "a"},
{enc, GBK, "\u4e02갂", "\x81@"},
{enc, HZGB2312, "갂", ""},
{enc, HZGB2312, "a갂", "a"},
{enc, HZGB2312, "\u6cf5갂", "~{1C~}"},
{dec, GB18030, "\x80", "€"},
{dec, GB18030, "\x81", "\ufffd"},
{dec, GB18030, "\x81\x20", "\ufffd "},
{dec, GB18030, "\xfe\xfe", "\ufffd"},
{dec, GB18030, "\xfe\xff", "\ufffd\ufffd"},
{dec, GB18030, "\xfe\x30", "\ufffd0"},
{dec, GB18030, "\xfe\x30\x30 ", "\ufffd00 "},
{dec, GB18030, "\xfe\x30\xff ", "\ufffd0\ufffd "},
{dec, GB18030, "\xfe\x30\x81\x21", "\ufffd0\ufffd!"},
{dec, GB18030, strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd0", n)},
{dec, HZGB2312, "~/", "\ufffd"},
{dec, HZGB2312, "~{a\x80", "\ufffd"},
{dec, HZGB2312, "~{a\x80", "\ufffd"},
{dec, HZGB2312, "~{" + strings.Repeat("z~", n), strings.Repeat("\ufffd", n)},
{dec, HZGB2312, "~{" + strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd", n*2)},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
dst, _, err := transform.String(tr, tc.src)
if err != wantErr {
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
}
if got := string(dst); got != tc.want {
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
}
}
}
func TestBasics(t *testing.T) {
// The encoded forms can be verified by the iconv program:
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
testCases := []struct {
e encoding.Encoding
encPrefix string
encoded string
utf8 string
}{{
// "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000" is a
// nonsense string that contains GB18030 encodable codepoints of which
// only U+00E0 and U+00E1 are GBK encodable.
//
// "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€" is a nonsense
// string that contains ASCII and GBK encodable codepoints from Levels
// 1-5 as well as the Euro sign.
//
// "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€" is a nonsense string
// that contains ASCII and Big5 encodable codepoints from the Basic
// Multilingual Plane and the Supplementary Ideographic Plane as well as
// the Euro sign.
//
// "花间一壶酒,独酌无相亲。" (simplified) and
// "花間一壺酒,獨酌無相親。" (traditional)
// are from the 8th century poem "Yuè Xià Dú Zhuó".
e: GB18030,
encoded: "\x81\x30\x81\x31\x81\x30\x89\x37\x81\x30\x89\x38\xa8\xa4\xa8\xa2" +
"\x81\x30\x89\x39\x81\x30\x8a\x30\x84\x31\xa4\x39\x90\x30\x81\x30",
utf8: "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000",
}, {
e: GB18030,
encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
utf8: "花间一壶酒,独酌无相亲。",
}, {
e: GBK,
encoded: "A\xa1\xa1\xb0\xa1\x81\x40\x81\x80\xaa\x40\xaa\x80\xa8\x40\xa8\x80Z\x80",
utf8: "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€",
}, {
e: GBK,
encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
utf8: "花间一壶酒,独酌无相亲。",
}, {
e: HZGB2312,
encoded: "A~{\x21\x21~~\x30\x21~}Z~~",
utf8: "A\u3000~\u554aZ~",
}, {
e: HZGB2312,
encPrefix: "~{",
encoded: ";(<dR;:x>F#,6@WCN^O`GW!#",
utf8: "花间一壶酒,独酌无相亲。",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, "")
}
}
func TestFiles(t *testing.T) {
enctest.TestFile(t, GB18030)
enctest.TestFile(t, GBK)
enctest.TestFile(t, HZGB2312)
}
func BenchmarkEncoding(b *testing.B) {
enctest.Benchmark(b, GB18030)
enctest.Benchmark(b, GBK)
enctest.Benchmark(b, HZGB2312)
}

View file

@ -1,114 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package traditionalchinese
import (
"fmt"
"io/ioutil"
"strings"
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/internal"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Decode", e.NewDecoder(), nil
}
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
}
func TestNonRepertoire(t *testing.T) {
testCases := []struct {
init func(e encoding.Encoding) (string, transform.Transformer, error)
e encoding.Encoding
src, want string
}{
{dec, Big5, "\x80", "\ufffd"},
{dec, Big5, "\x81", "\ufffd"},
{dec, Big5, "\x81\x30", "\ufffd\x30"},
{dec, Big5, "\x81\x40", "\ufffd"},
{dec, Big5, "\x81\xa0", "\ufffd"},
{dec, Big5, "\xff", "\ufffd"},
{enc, Big5, "갂", ""},
{enc, Big5, "a갂", "a"},
{enc, Big5, "\u43f0갂", "\x87@"},
}
for _, tc := range testCases {
dir, tr, wantErr := tc.init(tc.e)
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
dst := make([]byte, 100)
src := []byte(tc.src)
for i := 0; i <= len(tc.src); i++ {
nDst, nSrc, err := tr.Transform(dst, src[:i], false)
if err != nil && err != transform.ErrShortSrc && err != wantErr {
t.Fatalf("error on first call to Transform: %v", err)
}
n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
nDst += n
if err != wantErr {
t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
}
if got := string(dst[:nDst]); got != tc.want {
t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
}
}
})
}
}
func TestBasics(t *testing.T) {
// The encoded forms can be verified by the iconv program:
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
testCases := []struct {
e encoding.Encoding
encPrefix string
encSuffix string
encoded string
utf8 string
}{{
e: Big5,
encoded: "A\x87\x40\x87\x41\x87\x45\xa1\x40\xfe\xfd\xfe\xfeZ\xa3\xe1",
utf8: "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€",
}, {
e: Big5,
encoded: "\xaa\xe1\xb6\xa1\xa4\x40\xb3\xfd\xb0\x73\xa1\x41\xbf\x57\xb0\x75" +
"\xb5\x4c\xac\xdb\xbf\xcb\xa1\x43",
utf8: "花間一壺酒,獨酌無相親。",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
}
}
func TestFiles(t *testing.T) { enctest.TestFile(t, Big5) }
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Big5) }
// TestBig5CircumflexAndMacron tests the special cases listed in
// http://encoding.spec.whatwg.org/#big5
// Note that these special cases aren't preserved by round-tripping through
// decoding and encoding (since
// http://encoding.spec.whatwg.org/index-big5.txt does not have an entry for
// U+0304 or U+030C), so we can't test this in TestBasics.
func TestBig5CircumflexAndMacron(t *testing.T) {
src := "\x88\x5f\x88\x60\x88\x61\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66 " +
"\x88\xa2\x88\xa3\x88\xa4\x88\xa5\x88\xa6"
want := "ÓǑÒ\u00ca\u0304Ế\u00ca\u030cỀÊ " +
"ü\u00ea\u0304ế\u00ea\u030cề"
dst, err := ioutil.ReadAll(transform.NewReader(
strings.NewReader(src), Big5.NewDecoder()))
if err != nil {
t.Fatal(err)
}
if got := string(dst); got != want {
t.Fatalf("\ngot %q\nwant %q", got, want)
}
}

View file

@ -1,499 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package unicode
import (
"testing"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/internal/enctest"
"golang.org/x/text/transform"
)
func TestBasics(t *testing.T) {
testCases := []struct {
e encoding.Encoding
encPrefix string
encSuffix string
encoded string
utf8 string
}{{
e: utf16BEIB,
encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
utf8: "\x57\u00e4\U0001d565",
}, {
e: utf16BEEB,
encPrefix: "\xfe\xff",
encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
utf8: "\x57\u00e4\U0001d565",
}, {
e: utf16LEIB,
encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
utf8: "\x57\u00e4\U0001d565",
}, {
e: utf16LEEB,
encPrefix: "\xff\xfe",
encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
utf8: "\x57\u00e4\U0001d565",
}}
for _, tc := range testCases {
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
}
}
func TestFiles(t *testing.T) {
enctest.TestFile(t, UTF8)
enctest.TestFile(t, utf16LEIB)
}
func BenchmarkEncoding(b *testing.B) {
enctest.Benchmark(b, UTF8)
enctest.Benchmark(b, utf16LEIB)
}
var (
utf16LEIB = UTF16(LittleEndian, IgnoreBOM) // UTF-16LE (atypical interpretation)
utf16LEUB = UTF16(LittleEndian, UseBOM) // UTF-16, LE
utf16LEEB = UTF16(LittleEndian, ExpectBOM) // UTF-16, LE, Expect
utf16BEIB = UTF16(BigEndian, IgnoreBOM) // UTF-16BE (atypical interpretation)
utf16BEUB = UTF16(BigEndian, UseBOM) // UTF-16 default
utf16BEEB = UTF16(BigEndian, ExpectBOM) // UTF-16 Expect
)
func TestUTF16(t *testing.T) {
testCases := []struct {
desc string
src string
notEOF bool // the inverse of atEOF
sizeDst int
want string
nSrc int
err error
t transform.Transformer
}{{
desc: "utf-16 IgnoreBOM dec: empty string",
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16 UseBOM dec: empty string",
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16 ExpectBOM dec: empty string",
err: ErrMissingBOM,
t: utf16BEEB.NewDecoder(),
}, {
desc: "utf-16 dec: BOM determines encoding BE (RFC 2781:3.3)",
src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 12,
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16 dec: BOM determines encoding LE (RFC 2781:3.3)",
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 12,
t: utf16LEUB.NewDecoder(),
}, {
desc: "utf-16 dec: BOM determines encoding LE, change default (RFC 2781:3.3)",
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 12,
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16 dec: Fail on missing BOM when required",
src: "\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x00\x52\x00\x61",
sizeDst: 100,
want: "",
nSrc: 0,
err: ErrMissingBOM,
t: utf16BEEB.NewDecoder(),
}, {
desc: "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)",
src: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
sizeDst: 100,
want: "\U00012345=Ra",
nSrc: 10,
t: utf16BEUB.NewDecoder(),
}, {
// This is an error according to RFC 2781. But errors in RFC 2781 are
// open to interpretations, so I guess this is fine.
desc: "utf-16le dec: incorrect BOM is an error (RFC 2781:4.1)",
src: "\xFE\xFF\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
sizeDst: 100,
want: "\uFFFE\U00012345=Ra",
nSrc: 12,
t: utf16LEIB.NewDecoder(),
}, {
desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
nSrc: 7,
t: utf16LEUB.NewEncoder(),
}, {
desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
nSrc: 7,
t: utf16BEUB.NewEncoder(),
}, {
desc: "utf-16le enc: MUST NOT write BOM (RFC 2781:3.3)",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
nSrc: 7,
t: utf16LEIB.NewEncoder(),
}, {
desc: "utf-16be dec: incorrect UTF-16: odd bytes",
src: "\x00",
sizeDst: 100,
want: "\uFFFD",
nSrc: 1,
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16be dec: unpaired surrogate, odd bytes",
src: "\xD8\x45\x00",
sizeDst: 100,
want: "\uFFFD\uFFFD",
nSrc: 3,
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16be dec: unpaired low surrogate + valid text",
src: "\xD8\x45\x00a",
sizeDst: 100,
want: "\uFFFDa",
nSrc: 4,
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16be dec: unpaired low surrogate + valid text + single byte",
src: "\xD8\x45\x00ab",
sizeDst: 100,
want: "\uFFFDa\uFFFD",
nSrc: 5,
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16le dec: unpaired high surrogate",
src: "\x00\x00\x00\xDC\x12\xD8",
sizeDst: 100,
want: "\x00\uFFFD\uFFFD",
nSrc: 6,
t: utf16LEIB.NewDecoder(),
}, {
desc: "utf-16be dec: two unpaired low surrogates",
src: "\xD8\x45\xD8\x12",
sizeDst: 100,
want: "\uFFFD\uFFFD",
nSrc: 4,
t: utf16BEIB.NewDecoder(),
}, {
desc: "utf-16be dec: short dst",
src: "\x00a",
sizeDst: 0,
want: "",
nSrc: 0,
t: utf16BEIB.NewDecoder(),
err: transform.ErrShortDst,
}, {
desc: "utf-16be dec: short dst surrogate",
src: "\xD8\xF5\xDC\x12",
sizeDst: 3,
want: "",
nSrc: 0,
t: utf16BEIB.NewDecoder(),
err: transform.ErrShortDst,
}, {
desc: "utf-16be dec: short dst trailing byte",
src: "\x00",
sizeDst: 2,
want: "",
nSrc: 0,
t: utf16BEIB.NewDecoder(),
err: transform.ErrShortDst,
}, {
desc: "utf-16be dec: short src",
src: "\x00",
notEOF: true,
sizeDst: 3,
want: "",
nSrc: 0,
t: utf16BEIB.NewDecoder(),
err: transform.ErrShortSrc,
}, {
desc: "utf-16 enc",
src: "\U00012345=Ra",
sizeDst: 100,
want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
nSrc: 7,
t: utf16BEUB.NewEncoder(),
}, {
desc: "utf-16 enc: short dst normal",
src: "\U00012345=Ra",
sizeDst: 9,
want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52",
nSrc: 6,
t: utf16BEIB.NewEncoder(),
err: transform.ErrShortDst,
}, {
desc: "utf-16 enc: short dst surrogate",
src: "\U00012345=Ra",
sizeDst: 3,
want: "",
nSrc: 0,
t: utf16BEIB.NewEncoder(),
err: transform.ErrShortDst,
}, {
desc: "utf-16 enc: short src",
src: "\U00012345=Ra\xC2",
notEOF: true,
sizeDst: 100,
want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
nSrc: 7,
t: utf16BEIB.NewEncoder(),
err: transform.ErrShortSrc,
}, {
desc: "utf-16be dec: don't change byte order mid-stream",
src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\xFF\xFE\x00\x52\x00\x61",
sizeDst: 100,
want: "\U00012345=\ufffeRa",
nSrc: 14,
t: utf16BEUB.NewDecoder(),
}, {
desc: "utf-16le dec: don't change byte order mid-stream",
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x52\x00\x61\x00",
sizeDst: 100,
want: "\U00012345=\ufeff\ufffeRa",
nSrc: 16,
t: utf16LEUB.NewDecoder(),
}}
for i, tc := range testCases {
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
}
}
}
func TestUTF8Decoder(t *testing.T) {
testCases := []struct {
desc string
src string
notEOF bool // the inverse of atEOF
sizeDst int
want string
nSrc int
err error
}{{
desc: "empty string, empty dest buffer",
}, {
desc: "empty string",
sizeDst: 8,
}, {
desc: "empty string, streaming",
notEOF: true,
sizeDst: 8,
}, {
desc: "ascii",
src: "abcde",
sizeDst: 8,
want: "abcde",
nSrc: 5,
}, {
desc: "ascii and error",
src: "ab\x80de",
sizeDst: 7,
want: "ab\ufffdde",
nSrc: 5,
}, {
desc: "valid two-byte sequence",
src: "a\u0300bc",
sizeDst: 7,
want: "a\u0300bc",
nSrc: 5,
}, {
desc: "valid three-byte sequence",
src: "a\u0300中",
sizeDst: 7,
want: "a\u0300中",
nSrc: 6,
}, {
desc: "valid four-byte sequence",
src: "a中\U00016F50",
sizeDst: 8,
want: "a中\U00016F50",
nSrc: 8,
}, {
desc: "short source buffer",
src: "abc\xf0\x90",
notEOF: true,
sizeDst: 10,
want: "abc",
nSrc: 3,
err: transform.ErrShortSrc,
}, {
// We don't check for the maximal subpart of an ill-formed subsequence
// at the end of an open segment.
desc: "complete invalid that looks like short at end",
src: "abc\xf0\x80",
notEOF: true,
sizeDst: 10,
want: "abc", // instead of "abc\ufffd\ufffd",
nSrc: 3,
err: transform.ErrShortSrc,
}, {
desc: "incomplete sequence at end",
src: "a\x80bc\xf0\x90",
sizeDst: 9,
want: "a\ufffdbc\ufffd",
nSrc: 6,
}, {
desc: "invalid second byte",
src: "abc\xf0dddd",
sizeDst: 10,
want: "abc\ufffddddd",
nSrc: 8,
}, {
desc: "invalid second byte at end",
src: "abc\xf0d",
sizeDst: 10,
want: "abc\ufffdd",
nSrc: 5,
}, {
desc: "invalid third byte",
src: "a\u0300bc\xf0\x90dddd",
sizeDst: 12,
want: "a\u0300bc\ufffddddd",
nSrc: 11,
}, {
desc: "invalid third byte at end",
src: "a\u0300bc\xf0\x90d",
sizeDst: 12,
want: "a\u0300bc\ufffdd",
nSrc: 8,
}, {
desc: "invalid fourth byte, tight buffer",
src: "a\u0300bc\xf0\x90\x80d",
sizeDst: 9,
want: "a\u0300bc\ufffdd",
nSrc: 9,
}, {
desc: "invalid fourth byte at end",
src: "a\u0300bc\xf0\x90\x80",
sizeDst: 8,
want: "a\u0300bc\ufffd",
nSrc: 8,
}, {
desc: "invalid fourth byte and short four byte sequence",
src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
notEOF: true,
sizeDst: 20,
want: "a\u0300bc\ufffd",
nSrc: 8,
err: transform.ErrShortSrc,
}, {
desc: "valid four-byte sequence overflowing short buffer",
src: "a\u0300bc\xf0\x90\x80\x80",
notEOF: true,
sizeDst: 8,
want: "a\u0300bc",
nSrc: 5,
err: transform.ErrShortDst,
}, {
desc: "invalid fourth byte at end short, but short dst",
src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
notEOF: true,
sizeDst: 8,
// More bytes would fit in the buffer, but this seems to require a more
// complicated and slower algorithm.
want: "a\u0300bc", // instead of "a\u0300bc"
nSrc: 5,
err: transform.ErrShortDst,
}, {
desc: "short dst for error",
src: "abc\x80",
notEOF: true,
sizeDst: 5,
want: "abc",
nSrc: 3,
err: transform.ErrShortDst,
}, {
desc: "adjusting short dst buffer",
src: "abc\x80ef",
notEOF: true,
sizeDst: 6,
want: "abc\ufffd",
nSrc: 4,
err: transform.ErrShortDst,
}}
tr := UTF8.NewDecoder()
for i, tc := range testCases {
b := make([]byte, tc.sizeDst)
nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
if err != tc.err {
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
}
if got := string(b[:nDst]); got != tc.want {
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
}
if nSrc != tc.nSrc {
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
}
}
}
func TestBOMOverride(t *testing.T) {
dec := BOMOverride(charmap.CodePage437.NewDecoder())
dst := make([]byte, 100)
for i, tc := range []struct {
src string
atEOF bool
dst string
nSrc int
err error
}{
0: {"H\x82ll\x93", true, "Héllô", 5, nil},
1: {"\uFEFFHéllö", true, "Héllö", 10, nil},
2: {"\xFE\xFF\x00H\x00e\x00l\x00l\x00o", true, "Hello", 12, nil},
3: {"\xFF\xFEH\x00e\x00l\x00l\x00o\x00", true, "Hello", 12, nil},
4: {"\uFEFF", true, "", 3, nil},
5: {"\xFE\xFF", true, "", 2, nil},
6: {"\xFF\xFE", true, "", 2, nil},
7: {"\xEF\xBB", true, "\u2229\u2557", 2, nil},
8: {"\xEF", true, "\u2229", 1, nil},
9: {"", true, "", 0, nil},
10: {"\xFE", true, "\u25a0", 1, nil},
11: {"\xFF", true, "\u00a0", 1, nil},
12: {"\xEF\xBB", false, "", 0, transform.ErrShortSrc},
13: {"\xEF", false, "", 0, transform.ErrShortSrc},
14: {"", false, "", 0, transform.ErrShortSrc},
15: {"\xFE", false, "", 0, transform.ErrShortSrc},
16: {"\xFF", false, "", 0, transform.ErrShortSrc},
17: {"\xFF\xFE", false, "", 0, transform.ErrShortSrc},
} {
dec.Reset()
nDst, nSrc, err := dec.Transform(dst, []byte(tc.src), tc.atEOF)
got := string(dst[:nDst])
if nSrc != tc.nSrc {
t.Errorf("%d: nSrc: got %d; want %d", i, nSrc, tc.nSrc)
}
if got != tc.dst {
t.Errorf("%d: got %+q; want %+q", i, got, tc.dst)
}
if err != tc.err {
t.Errorf("%d: error: got %v; want %v", i, err, tc.err)
}
}
}

9
vendor/golang.org/x/text/gen.go generated vendored
View file

@ -117,14 +117,15 @@ pkg unicode, var <new script or property> *RangeTable
width = generate("./width", unicode)
bidi = generate("./unicode/bidi", unicode, norm, rangetable)
mib = generate("./encoding/internal/identifier", unicode)
number = generate("./internal/number", unicode, cldr, language, internal)
_ = generate("./encoding/htmlindex", unicode, language, mib)
_ = generate("./encoding/ianaindex", unicode, language, mib)
_ = generate("./secure/precis", unicode, norm, rangetable, cases, width, bidi)
_ = generate("./currency", unicode, cldr, language, internal)
_ = generate("./internal/number", unicode, cldr, language, internal)
_ = generate("./feature/plural", unicode, cldr, language, internal)
_ = generate("./internal/cldrtree", language)
_ = generate("./currency", unicode, cldr, language, internal, number)
_ = generate("./feature/plural", unicode, cldr, language, internal, number)
_ = generate("./internal/export/idna", unicode, bidi, norm)
_ = generate("./language/display", unicode, cldr, language, internal)
_ = generate("./language/display", unicode, cldr, language, internal, number)
_ = generate("./collate", unicode, norm, cldr, language, rangetable)
_ = generate("./search", unicode, norm, cldr, language, rangetable)
)

View file

@ -1,121 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab_test
// This file contains tests which need to import package collate, which causes
// an import cycle when done within package colltab itself.
import (
"bytes"
"testing"
"unicode"
"golang.org/x/text/collate"
"golang.org/x/text/language"
"golang.org/x/text/unicode/rangetable"
)
// assigned is used to only test runes that are inside the scope of the Unicode
// version used to generation the collation table.
var assigned = rangetable.Assigned(collate.UnicodeVersion)
func TestNonDigits(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Verify that all non-digit numbers sort outside of the number range.
for r, hi := rune(unicode.N.R16[0].Lo), rune(unicode.N.R32[0].Hi); r <= hi; r++ {
if unicode.In(r, unicode.Nd) || !unicode.In(r, assigned) {
continue
}
if a := string(r); c.CompareString(a, "0") != -1 && c.CompareString(a, "999999") != 1 {
t.Errorf("%+q non-digit number is collated as digit", a)
}
}
}
func TestNumericCompare(t *testing.T) {
c := collate.New(language.English, collate.Loose, collate.Numeric)
// Iterate over all digits.
for _, r16 := range unicode.Nd.R16 {
testDigitCompare(t, c, rune(r16.Lo), rune(r16.Hi))
}
for _, r32 := range unicode.Nd.R32 {
testDigitCompare(t, c, rune(r32.Lo), rune(r32.Hi))
}
}
func testDigitCompare(t *testing.T, c *collate.Collator, zero, nine rune) {
if !unicode.In(zero, assigned) {
return
}
n := int(nine - zero + 1)
if n%10 != 0 {
t.Fatalf("len([%+q, %+q]) = %d; want a multiple of 10", zero, nine, n)
}
for _, tt := range []struct {
prefix string
b [11]string
}{
{
prefix: "",
b: [11]string{
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
},
},
{
prefix: "1",
b: [11]string{
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
},
},
{
prefix: "0",
b: [11]string{
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
},
},
{
prefix: "00",
b: [11]string{
"000", "001", "002", "003", "004", "005", "006", "007", "008", "009", "010",
},
},
{
prefix: "9",
b: [11]string{
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100",
},
},
} {
for k := 0; k <= n; k++ {
i := k % 10
a := tt.prefix + string(zero+rune(i))
for j, b := range tt.b {
want := 0
switch {
case i < j:
want = -1
case i > j:
want = 1
}
got := c.CompareString(a, b)
if got != want {
t.Errorf("Compare(%+q, %+q) = %d; want %d", a, b, got, want)
return
}
}
}
}
}
func BenchmarkNumericWeighter(b *testing.B) {
c := collate.New(language.English, collate.Numeric)
input := bytes.Repeat([]byte("Testing, testing 123..."), 100)
b.SetBytes(int64(2 * len(input)))
for i := 0; i < b.N; i++ {
c.Compare(input, input)
}
}

View file

@ -1,183 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"fmt"
"testing"
"unicode"
)
func (e Elem) String() string {
q := ""
if v := e.Quaternary(); v == MaxQuaternary {
q = "max"
} else {
q = fmt.Sprint(v)
}
return fmt.Sprintf("[%d, %d, %d, %s]",
e.Primary(),
e.Secondary(),
e.Tertiary(),
q)
}
type ceTest struct {
f func(inout []int) (Elem, ceType)
arg []int
}
func makeCE(weights []int) Elem {
ce, _ := MakeElem(weights[0], weights[1], weights[2], uint8(weights[3]))
return ce
}
var defaultValues = []int{0, defaultSecondary, defaultTertiary, 0}
func e(w ...int) Elem {
return makeCE(append(w, defaultValues[len(w):]...))
}
func makeContractIndex(index, n, offset int) Elem {
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
ce := Elem(contractID)
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
ce += Elem(index << maxNBits)
ce += Elem(n)
return ce
}
func makeExpandIndex(index int) Elem {
const expandID = 0xE0000000
return expandID + Elem(index)
}
func makeDecompose(t1, t2 int) Elem {
const decompID = 0xF0000000
return Elem(t2<<8+t1) + decompID
}
func normalCE(inout []int) (ce Elem, t ceType) {
ce = makeCE(inout)
inout[0] = ce.Primary()
inout[1] = ce.Secondary()
inout[2] = int(ce.Tertiary())
inout[3] = int(ce.CCC())
return ce, ceNormal
}
func expandCE(inout []int) (ce Elem, t ceType) {
ce = makeExpandIndex(inout[0])
inout[0] = splitExpandIndex(ce)
return ce, ceExpansionIndex
}
func contractCE(inout []int) (ce Elem, t ceType) {
ce = makeContractIndex(inout[0], inout[1], inout[2])
i, n, o := splitContractIndex(ce)
inout[0], inout[1], inout[2] = i, n, o
return ce, ceContractionIndex
}
func decompCE(inout []int) (ce Elem, t ceType) {
ce = makeDecompose(inout[0], inout[1])
t1, t2 := splitDecompose(ce)
inout[0], inout[1] = int(t1), int(t2)
return ce, ceDecompose
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}},
{normalCE, []int{0, 30, 3, 0xFF}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
{normalCE, []int{100, defaultSecondary, 3, 0}},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
{contractCE, []int{0, 0, 0}},
{contractCE, []int{1, 1, 1}},
{contractCE, []int{1, (1 << maxNBits) - 1, 1}},
{contractCE, []int{(1 << maxTrieIndexBits) - 1, 1, 1}},
{contractCE, []int{1, 1, (1 << maxContractOffsetBits) - 1}},
{expandCE, []int{0}},
{expandCE, []int{5}},
{expandCE, []int{(1 << maxExpandIndexBits) - 1}},
{decompCE, []int{0, 0}},
{decompCE, []int{1, 1}},
{decompCE, []int{0x1F, 0x1F}},
}
func TestColElem(t *testing.T) {
for i, tt := range ceTests {
inout := make([]int, len(tt.arg))
copy(inout, tt.arg)
ce, typ := tt.f(inout)
if ce.ctype() != typ {
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
}
for j, a := range tt.arg {
if inout[j] != a {
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
}
}
}
}
type implicitTest struct {
r rune
p int
}
var implicitTests = []implicitTest{
{0x33FF, 0x533FF},
{0x3400, 0x23400},
{0x4DC0, 0x54DC0},
{0x4DFF, 0x54DFF},
{0x4E00, 0x14E00},
{0x9FCB, 0x19FCB},
{0xA000, 0x5A000},
{0xF8FF, 0x5F8FF},
{0xF900, 0x1F900},
{0xFA23, 0x1FA23},
{0xFAD9, 0x1FAD9},
{0xFB00, 0x5FB00},
{0x20000, 0x40000},
{0x2B81C, 0x4B81C},
{unicode.MaxRune, 0x15FFFF}, // maximum primary value
}
func TestImplicit(t *testing.T) {
for _, tt := range implicitTests {
if p := implicitPrimary(tt.r); p != tt.p {
t.Errorf("%U: was %X; want %X", tt.r, p, tt.p)
}
}
}
func TestUpdateTertiary(t *testing.T) {
tests := []struct {
in, out Elem
t uint8
}{
{0x4000FE20, 0x0000FE8A, 0x0A},
{0x4000FE21, 0x0000FEAA, 0x0A},
{0x0000FE8B, 0x0000FE83, 0x03},
{0x82FF0188, 0x9BFF0188, 0x1B},
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
}
for i, tt := range tests {
if out := tt.in.updateTertiary(tt.t); out != tt.out {
t.Errorf("%d: was %X; want %X", i, out, tt.out)
}
}
}

View file

@ -1,64 +0,0 @@
package colltab
import (
"testing"
"golang.org/x/text/language"
)
func TestMatchLang(t *testing.T) {
tags := []language.Tag{
0: language.Und,
1: language.MustParse("bs"),
2: language.German,
3: language.English,
4: language.AmericanEnglish,
5: language.MustParse("en-US-u-va-posix"),
6: language.Portuguese,
7: language.Serbian,
8: language.MustParse("sr-Latn"),
9: language.Chinese,
10: language.MustParse("zh-u-co-stroke"),
11: language.MustParse("zh-Hant-u-co-pinyin"),
12: language.TraditionalChinese,
}
for i, tc := range []struct {
x int
t language.Tag
}{
{0, language.Und},
{0, language.Persian}, // Default to first element when no match.
{3, language.English},
{4, language.AmericanEnglish},
{5, language.MustParse("en-US-u-va-posix")}, // Ext. variant match.
{4, language.MustParse("en-US-u-va-noposix")}, // Ext. variant mismatch.
{3, language.MustParse("en-UK-u-va-noposix")}, // Ext. variant mismatch.
{7, language.Serbian},
{0, language.Croatian}, // Don't match to close language!
{0, language.MustParse("gsw")}, // Don't match to close language!
{1, language.MustParse("bs-Cyrl")}, // Odd, but correct.
{1, language.MustParse("bs-Latn")}, // Estimated script drops.
{8, language.MustParse("sr-Latn")},
{9, language.Chinese},
{9, language.SimplifiedChinese},
{12, language.TraditionalChinese},
{11, language.MustParse("zh-Hant-u-co-pinyin")},
// TODO: should this be 12? Either inherited value (10) or default is
// fine in this case, though. Other locales are not affected.
{10, language.MustParse("zh-Hant-u-co-stroke")},
// There is no "phonebk" sorting order for zh-Hant, so use default.
{12, language.MustParse("zh-Hant-u-co-phonebk")},
{10, language.MustParse("zh-u-co-stroke")},
{12, language.MustParse("und-TW")}, // Infer script and language.
{12, language.MustParse("und-HK")}, // Infer script and language.
{6, language.MustParse("und-BR")}, // Infer script and language.
{6, language.MustParse("und-PT")}, // Infer script and language.
{2, language.MustParse("und-Latn-DE")}, // Infer language.
{0, language.MustParse("und-Jpan-BR")}, // Infers "ja", so no match.
{0, language.MustParse("zu")}, // No match past index.
} {
if x := MatchLang(tc.t, tags); x != tc.x {
t.Errorf("%d: MatchLang(%q, tags) = %d; want %d", i, tc.t, x, tc.x)
}
}
}

View file

@ -1,131 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
type lookupStrings struct {
str string
offset int
n int // bytes consumed from input
}
type LookupTest struct {
lookup []lookupStrings
n int
tries ContractTrieSet
}
var lookupTests = []LookupTest{{
[]lookupStrings{
{"abc", 1, 3},
{"a", 0, 0},
{"b", 0, 0},
{"c", 0, 0},
{"d", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"abe", 3, 3},
{"a", 0, 0},
{"ab", 0, 0},
{"d", 0, 0},
{"f", 0, 0},
},
1,
ContractTrieSet{
{'a', 0, 1, 0xFF},
{'b', 0, 1, 0xFF},
{'c', 'e', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"ab", 2, 2},
{"a", 3, 1},
{"abcd", 1, 3},
{"abe", 2, 2},
},
1,
ContractTrieSet{
{'a', 0, 1, 3},
{'b', 0, 1, 2},
{'c', 'c', 0, 1},
},
}, {
[]lookupStrings{
{"abc", 1, 3},
{"abd", 2, 3},
{"ab", 3, 2},
{"ac", 4, 2},
{"a", 5, 1},
{"b", 6, 1},
{"ba", 6, 1},
},
2,
ContractTrieSet{
{'b', 'b', 0, 6},
{'a', 0, 2, 5},
{'c', 'c', 0, 4},
{'b', 0, 1, 3},
{'c', 'd', 0, 1},
},
}, {
[]lookupStrings{
{"bcde", 2, 4},
{"bc", 7, 2},
{"ab", 6, 2},
{"bcd", 5, 3},
{"abcd", 1, 4},
{"abc", 4, 3},
{"bcdf", 3, 4},
},
2,
ContractTrieSet{
{'b', 3, 1, 0xFF},
{'a', 0, 1, 0xFF},
{'b', 0, 1, 6},
{'c', 0, 1, 4},
{'d', 'd', 0, 1},
{'c', 0, 1, 7},
{'d', 0, 1, 5},
{'e', 'f', 0, 2},
},
}}
func lookup(c *ContractTrieSet, nnode int, s []uint8) (i, n int) {
scan := c.scanner(0, nnode, s)
scan.scan(0)
return scan.result()
}
func TestLookupContraction(t *testing.T) {
for i, tt := range lookupTests {
cts := ContractTrieSet(tt.tries)
for j, lu := range tt.lookup {
str := lu.str
for _, s := range []string{str, str + "X"} {
const msg = `%d:%d: %s of "%s" %v; want %v`
offset, n := lookup(&cts, tt.n, []byte(s))
if offset != lu.offset {
t.Errorf(msg, i, j, "offset", s, offset, lu.offset)
}
if n != lu.n {
t.Errorf(msg, i, j, "bytes consumed", s, n, len(str))
}
}
}
}
}

View file

@ -1,63 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{{
in: []int{4, div, 3},
out: []int{3, 4},
}, {
in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
}, {
in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
}, {
in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
}, {
in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := Iter{}
var w, p int
for k, cc := range tt.in {
if cc == div {
w = 100
p = k
continue
}
i.Elems = append(i.Elems, makeCE([]int{w, defaultSecondary, 2, cc}))
}
i.doNorm(p, i.Elems[p].CCC())
if len(i.Elems) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.Elems), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.Elems {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.Elems[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// Combining rune overflow is tested in search/pattern_test.go.
}

View file

@ -1,159 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"reflect"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
)
const (
digSec = defaultSecondary
digTert = defaultTertiary
)
var tPlus3 = e(0, 50, digTert+3)
// numWeighter is a testWeighter used for testing numericWeighter.
var numWeighter = testWeighter{
"0": p(100),
"": []Elem{e(100, digSec, digTert+1)}, // U+FF10 FULLWIDTH DIGIT ZERO
"₀": []Elem{e(100, digSec, digTert+5)}, // U+2080 SUBSCRIPT ZERO
"1": p(101),
// Allow non-primary collation elements to be inserted.
"١": append(p(101), tPlus3), // U+0661 ARABIC-INDIC DIGIT ONE
// Allow varying tertiary weight if the number is Nd.
"": []Elem{e(101, digSec, digTert+1)}, // U+FF11 FULLWIDTH DIGIT ONE
"2": p(102),
// Allow non-primary collation elements to be inserted.
"٢": append(p(102), tPlus3), // U+0662 ARABIC-INDIC DIGIT TWO
// Varying tertiary weights should be ignored.
"": []Elem{e(102, digSec, digTert+3)}, // U+FF12 FULLWIDTH DIGIT TWO
"3": p(103),
"4": p(104),
"5": p(105),
"6": p(106),
"7": p(107),
// Weights must be strictly monotonically increasing, but do not need to be
// consecutive.
"8": p(118),
"9": p(119),
// Allow non-primary collation elements to be inserted.
"٩": append(p(119), tPlus3), // U+0669 ARABIC-INDIC DIGIT NINE
// Varying tertiary weights should be ignored.
"": []Elem{e(119, digSec, digTert+1)}, // U+FF19 FULLWIDTH DIGIT NINE
"₉": []Elem{e(119, digSec, digTert+5)}, // U+2089 SUBSCRIPT NINE
"a": p(5),
"b": p(6),
"c": p(8, 2),
"klm": p(99),
"nop": p(121),
"x": p(200),
"y": p(201),
}
func p(w ...int) (elems []Elem) {
for _, x := range w {
e, _ := MakeElem(x, digSec, digTert, 0)
elems = append(elems, e)
}
return elems
}
func TestNumericAppendNext(t *testing.T) {
for _, tt := range []struct {
in string
w []Elem
}{
{"a", p(5)},
{"klm", p(99)},
{"aa", p(5, 5)},
{"1", p(120, 1, 101)},
{"0", p(120, 0)},
{"01", p(120, 1, 101)},
{"0001", p(120, 1, 101)},
{"10", p(120, 2, 101, 100)},
{"99", p(120, 2, 119, 119)},
{"9999", p(120, 4, 119, 119, 119, 119)},
{"1a", p(120, 1, 101, 5)},
{"0b", p(120, 0, 6)},
{"01c", p(120, 1, 101, 8, 2)},
{"10x", p(120, 2, 101, 100, 200)},
{"99y", p(120, 2, 119, 119, 201)},
{"9999nop", p(120, 4, 119, 119, 119, 119, 121)},
// Allow follow-up collation elements if they have a zero non-primary.
{"١٢٩", []Elem{e(120), e(3), e(101), tPlus3, e(102), tPlus3, e(119), tPlus3}},
{
"",
[]Elem{
e(120), e(3),
e(101, digSec, digTert+1),
e(102, digSec, digTert+3),
e(119, digSec, digTert+1),
},
},
// Ensure AppendNext* adds to the given buffer.
{"a10", p(5, 120, 2, 101, 100)},
} {
nw := NewNumericWeighter(numWeighter)
b := []byte(tt.in)
got := []Elem(nil)
for n, sz := 0, 0; n < len(b); {
got, sz = nw.AppendNext(got, b[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNext(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
got = nil
for n, sz := 0, 0; n < len(tt.in); {
got, sz = nw.AppendNextString(got, tt.in[n:])
n += sz
}
if !reflect.DeepEqual(got, tt.w) {
t.Errorf("AppendNextString(%q) =\n%v; want\n%v", tt.in, got, tt.w)
}
}
}
func TestNumericOverflow(t *testing.T) {
manyDigits := strings.Repeat("9", maxDigits+1) + "a"
nw := NewNumericWeighter(numWeighter)
got, n := nw.AppendNextString(nil, manyDigits)
if n != maxDigits {
t.Errorf("n: got %d; want %d", n, maxDigits)
}
if got[1].Primary() != maxDigits {
t.Errorf("primary(e[1]): got %d; want %d", n, maxDigits)
}
}
func TestNumericWeighterAlloc(t *testing.T) {
buf := make([]Elem, 100)
w := NewNumericWeighter(numWeighter)
s := "1234567890a"
nNormal := testtext.AllocsPerRun(3, func() { numWeighter.AppendNextString(buf, s) })
nNumeric := testtext.AllocsPerRun(3, func() { w.AppendNextString(buf, s) })
if n := nNumeric - nNormal; n > 0 {
t.Errorf("got %f; want 0", n)
}
}

View file

@ -1,106 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
import (
"testing"
)
// We take the smallest, largest and an arbitrary value for each
// of the UTF-8 sequence lengths.
var testRunes = []rune{
0x01, 0x0C, 0x7F, // 1-byte sequences
0x80, 0x100, 0x7FF, // 2-byte sequences
0x800, 0x999, 0xFFFF, // 3-byte sequences
0x10000, 0x10101, 0x10FFFF, // 4-byte sequences
0x200, 0x201, 0x202, 0x210, 0x215, // five entries in one sparse block
}
// Test cases for illegal runes.
type trietest struct {
size int
bytes []byte
}
var tests = []trietest{
// illegal runes
{1, []byte{0x80}},
{1, []byte{0xFF}},
{1, []byte{t2, tx - 1}},
{1, []byte{t2, t2}},
{2, []byte{t3, tx, tx - 1}},
{2, []byte{t3, tx, t2}},
{1, []byte{t3, tx - 1, tx}},
{3, []byte{t4, tx, tx, tx - 1}},
{3, []byte{t4, tx, tx, t2}},
{1, []byte{t4, t2, tx, tx - 1}},
{2, []byte{t4, tx, t2, tx - 1}},
// short runes
{0, []byte{t2}},
{0, []byte{t3, tx}},
{0, []byte{t4, tx, tx}},
// we only support UTF-8 up to utf8.UTFMax bytes (4 bytes)
{1, []byte{t5, tx, tx, tx, tx}},
{1, []byte{t6, tx, tx, tx, tx, tx}},
}
func TestLookupTrie(t *testing.T) {
for i, r := range testRunes {
b := []byte(string(r))
v, sz := testTrie.lookup(b)
if int(v) != i {
t.Errorf("lookup(%U): found value %#x, expected %#x", r, v, i)
}
if sz != len(b) {
t.Errorf("lookup(%U): found size %d, expected %d", r, sz, len(b))
}
}
for i, tt := range tests {
v, sz := testTrie.lookup(tt.bytes)
if int(v) != 0 {
t.Errorf("lookup of illegal rune, case %d: found value %#x, expected 0", i, v)
}
if sz != tt.size {
t.Errorf("lookup of illegal rune, case %d: found size %d, expected %d", i, sz, tt.size)
}
}
}
// test data is taken from exp/collate/locale/build/trie_test.go
var testValues = [832]uint32{
0x000c: 0x00000001,
0x007f: 0x00000002,
0x00c0: 0x00000003,
0x0100: 0x00000004,
0x0140: 0x0000000c, 0x0141: 0x0000000d, 0x0142: 0x0000000e,
0x0150: 0x0000000f,
0x0155: 0x00000010,
0x01bf: 0x00000005,
0x01c0: 0x00000006,
0x0219: 0x00000007,
0x027f: 0x00000008,
0x0280: 0x00000009,
0x02c1: 0x0000000a,
0x033f: 0x0000000b,
}
var testLookup = [640]uint16{
0x0e0: 0x05, 0x0e6: 0x06,
0x13f: 0x07,
0x140: 0x08, 0x144: 0x09,
0x190: 0x03,
0x1ff: 0x0a,
0x20f: 0x05,
0x242: 0x01, 0x244: 0x02,
0x248: 0x03,
0x25f: 0x04,
0x260: 0x01,
0x26f: 0x02,
0x270: 0x04, 0x274: 0x06,
}
var testTrie = Trie{testLookup[6*blockSize:], testValues[:], testLookup[:], testValues[:]}

View file

@ -1,42 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package colltab
// testWeighter is a simple Weighter that returns weights from a user-defined map.
type testWeighter map[string][]Elem
func (t testWeighter) Start(int, []byte) int { return 0 }
func (t testWeighter) StartString(int, string) int { return 0 }
func (t testWeighter) Domain() []string { return nil }
func (t testWeighter) Top() uint32 { return 0 }
// maxContractBytes is the maximum length of any key in the map.
const maxContractBytes = 10
func (t testWeighter) AppendNext(buf []Elem, s []byte) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[string(s[:i])]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + string(s))
}
func (t testWeighter) AppendNextString(buf []Elem, s string) ([]Elem, int) {
n := len(s)
if n > maxContractBytes {
n = maxContractBytes
}
for i := n; i > 0; i-- {
if e, ok := t[s[:i]]; ok {
return append(buf, e...), i
}
}
panic("incomplete testWeighter: could not find " + s)
}

View file

@ -55,18 +55,36 @@ func (w *CodeWriter) WriteGoFile(filename, pkg string) {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg); err != nil {
if _, err = w.WriteGo(f, pkg, ""); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteVersionedGoFile appends the buffer with the total size of all created
// structures and writes it as a Go file to the the given file with the given
// package name and build tags for the current Unicode version,
func (w *CodeWriter) WriteVersionedGoFile(filename, pkg string) {
tags := buildTags()
if tags != "" {
filename = insertVersion(filename, UnicodeVersion())
}
f, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer f.Close()
if _, err = w.WriteGo(f, pkg, tags); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo appends the buffer with the total size of all created structures and
// writes it as a Go file to the the given writer with the given package name.
func (w *CodeWriter) WriteGo(out io.Writer, pkg string) (n int, err error) {
func (w *CodeWriter) WriteGo(out io.Writer, pkg, tags string) (n int, err error) {
sz := w.Size
w.WriteComment("Total table size %d bytes (%dKiB); checksum: %X\n", sz, sz/1024, w.Hash.Sum32())
defer w.buf.Reset()
return WriteGo(out, pkg, w.buf.Bytes())
return WriteGo(out, pkg, tags, w.buf.Bytes())
}
func (w *CodeWriter) printf(f string, x ...interface{}) {

View file

@ -31,6 +31,7 @@ import (
"os"
"path"
"path/filepath"
"strings"
"sync"
"unicode"
@ -69,8 +70,6 @@ func Init() {
const header = `// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package %s
`
// UnicodeVersion reports the requested Unicode version.
@ -78,11 +77,33 @@ func UnicodeVersion() string {
return *unicodeVersion
}
// UnicodeVersion reports the requested CLDR version.
// CLDRVersion reports the requested CLDR version.
func CLDRVersion() string {
return *cldrVersion
}
var tags = []struct{ version, buildTags string }{
{"10.0.0", "go1.10"},
{"", "!go1.10"},
}
// buildTags reports the build tags used for the current Unicode version.
func buildTags() string {
v := UnicodeVersion()
for _, x := range tags {
// We should do a numeric comparison, but including the collate package
// would create an import cycle. We approximate it by assuming that
// longer version strings are later.
if len(x.version) <= len(v) {
return x.buildTags
}
if len(x.version) == len(v) && x.version <= v {
return x.buildTags
}
}
return tags[0].buildTags
}
// IsLocal reports whether data files are available locally.
func IsLocal() bool {
dir, err := localReadmeFile()
@ -243,15 +264,46 @@ func WriteGoFile(filename, pkg string, b []byte) {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, b); err != nil {
if _, err = WriteGo(w, pkg, "", b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
func insertVersion(filename, version string) string {
suffix := ".go"
if strings.HasSuffix(filename, "_test.go") {
suffix = "_test.go"
}
return fmt.Sprint(filename[:len(filename)-len(suffix)], version, suffix)
}
// WriteVersionedGoFile prepends a standard file comment, adds build tags to
// version the file for the current Unicode version, and package statement to
// the given bytes, applies gofmt, and writes them to a file with the given
// name. It will call log.Fatal if there are any errors.
func WriteVersionedGoFile(filename, pkg string, b []byte) {
tags := buildTags()
if tags != "" {
filename = insertVersion(filename, UnicodeVersion())
}
w, err := os.Create(filename)
if err != nil {
log.Fatalf("Could not create file %s: %v", filename, err)
}
defer w.Close()
if _, err = WriteGo(w, pkg, tags, b); err != nil {
log.Fatalf("Error writing file %s: %v", filename, err)
}
}
// WriteGo prepends a standard file comment and package statement to the given
// bytes, applies gofmt, and writes them to w.
func WriteGo(w io.Writer, pkg string, b []byte) (n int, err error) {
src := []byte(fmt.Sprintf(header, pkg))
func WriteGo(w io.Writer, pkg, tags string, b []byte) (n int, err error) {
src := []byte(header)
if tags != "" {
src = append(src, fmt.Sprintf("// +build %s\n\n", tags)...)
}
src = append(src, fmt.Sprintf("package %s\n\n", pkg)...)
src = append(src, b...)
formatted, err := format.Source(src)
if err != nil {

View file

@ -1,38 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"testing"
"golang.org/x/text/language"
)
func TestParents(t *testing.T) {
testCases := []struct {
tag, parent string
}{
{"af", "und"},
{"en", "und"},
{"en-001", "en"},
{"en-AU", "en-001"},
{"en-US", "en"},
{"en-US-u-va-posix", "en-US"},
{"ca-ES-valencia", "ca-ES"},
}
for _, tc := range testCases {
tag, ok := language.CompactIndex(language.MustParse(tc.tag))
if !ok {
t.Fatalf("Could not get index of flag %s", tc.tag)
}
want, ok := language.CompactIndex(language.MustParse(tc.parent))
if !ok {
t.Fatalf("Could not get index of parent %s of tag %s", tc.parent, tc.tag)
}
if got := int(Parent[tag]); got != want {
t.Errorf("Parent[%s] = %d; want %d (%s)", tc.tag, got, want, tc.parent)
}
}
}

View file

@ -1,38 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"fmt"
"strings"
"testing"
"golang.org/x/text/language"
)
func TestUnique(t *testing.T) {
testCases := []struct {
in, want string
}{
{"", "[]"},
{"en", "[en]"},
{"en en", "[en]"},
{"en en en", "[en]"},
{"en-u-cu-eur en", "[en en-u-cu-eur]"},
{"nl en", "[en nl]"},
{"pt-Pt pt", "[pt pt-PT]"},
}
for _, tc := range testCases {
tags := []language.Tag{}
for _, s := range strings.Split(tc.in, " ") {
if s != "" {
tags = append(tags, language.MustParse(s))
}
}
if got := fmt.Sprint(UniqueTags(tags)); got != tc.want {
t.Errorf("Unique(%s) = %s; want %s", tc.in, got, tc.want)
}
}
}

View file

@ -1,56 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package internal
import (
"strings"
"testing"
"golang.org/x/text/language"
)
func TestInheritanceMatcher(t *testing.T) {
for i, tt := range []struct {
haveTags string
wantTags string
match string
conf language.Confidence
}{
{"und,en,en-US", "en-US", "en-US", language.Exact}, // most specific match
{"zh-Hant,zh", "zh-TW", "zh-Hant", language.High}, // zh-TW implies Hant.
{"und,zh", "zh-TW", "und", language.High}, // zh-TW does not match zh.
{"zh", "zh-TW", "und", language.No}, // zh-TW does not match zh.
{"iw,en,nl", "he", "he", language.Exact}, // matches after canonicalization
{"he,en,nl", "iw", "he", language.Exact}, // matches after canonicalization
// Prefer first match over more specific match for various reasons:
// a) consistency of user interface is more important than an exact match,
// b) _if_ und is specified, it should be considered a correct and useful match,
// Note that a call to this Match will almost always be with a single tag.
{"und,en,en-US", "he,en-US", "und", language.High},
} {
have := parseTags(tt.haveTags)
m := NewInheritanceMatcher(have)
tag, index, conf := m.Match(parseTags(tt.wantTags)...)
want := language.Raw.Make(tt.match)
if tag != want {
t.Errorf("%d:tag: got %q; want %q", i, tag, want)
}
if conf != language.No {
if got, _ := language.All.Canonicalize(have[index]); got != want {
t.Errorf("%d:index: got %q; want %q ", i, got, want)
}
}
if conf != tt.conf {
t.Errorf("%d:conf: got %v; want %v", i, conf, tt.conf)
}
}
}
func parseTags(list string) (out []language.Tag) {
for _, s := range strings.Split(list, ",") {
out = append(out, language.Raw.Make(strings.TrimSpace(s)))
}
return out
}

View file

@ -4,9 +4,9 @@ package internal
// Parent maps a compact index of a tag to the compact index of the parent of
// this tag.
var Parent = []uint16{ // 754 elements
var Parent = []uint16{ // 768 elements
// Entry 0 - 3F
0x0000, 0x0053, 0x00e5, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0053, 0x00e8, 0x0000, 0x0003, 0x0003, 0x0000, 0x0006,
0x0000, 0x0008, 0x0000, 0x000a, 0x0000, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c,
@ -18,100 +18,101 @@ var Parent = []uint16{ // 754 elements
0x0000, 0x0040, 0x0000, 0x0042, 0x0042, 0x0000, 0x0045, 0x0045,
0x0000, 0x0048, 0x0000, 0x004a, 0x0000, 0x0000, 0x004d, 0x004c,
0x004c, 0x0000, 0x0051, 0x0051, 0x0051, 0x0051, 0x0000, 0x0056,
0x0000, 0x0058, 0x0000, 0x005a, 0x0000, 0x005c, 0x005c, 0x0000,
0x005f, 0x0000, 0x0061, 0x0000, 0x0063, 0x0000, 0x0065, 0x0065,
0x0000, 0x0068, 0x0000, 0x006a, 0x006a, 0x006a, 0x006a, 0x006a,
0x006a, 0x006a, 0x0000, 0x0072, 0x0000, 0x0074, 0x0000, 0x0076,
0x0000, 0x0000, 0x0079, 0x0000, 0x007b, 0x0000, 0x007d, 0x0000,
0x0056, 0x0000, 0x0059, 0x0000, 0x005b, 0x0000, 0x005d, 0x0000,
0x005f, 0x005f, 0x0000, 0x0062, 0x0000, 0x0064, 0x0000, 0x0066,
0x0000, 0x0068, 0x0068, 0x0000, 0x006b, 0x0000, 0x006d, 0x006d,
0x006d, 0x006d, 0x006d, 0x006d, 0x006d, 0x0000, 0x0075, 0x0000,
0x0077, 0x0000, 0x0079, 0x0000, 0x0000, 0x007c, 0x0000, 0x007e,
// Entry 80 - BF
0x007f, 0x007f, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0086,
0x0086, 0x0086, 0x0085, 0x0087, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086,
0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0086, 0x0085, 0x0086,
0x0000, 0x0080, 0x0000, 0x0082, 0x0082, 0x0000, 0x0085, 0x0085,
0x0000, 0x0088, 0x0089, 0x0089, 0x0089, 0x0088, 0x008a, 0x0089,
0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089, 0x008a, 0x0089,
0x008a, 0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088,
// Entry C0 - FF
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0085,
0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0087, 0x0086, 0x0086,
0x0087, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086,
0x0086, 0x0086, 0x0086, 0x0086, 0x0085, 0x0085, 0x0086, 0x0086,
0x0085, 0x0086, 0x0086, 0x0086, 0x0086, 0x0086, 0x0000, 0x00ee,
0x0000, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1, 0x00f0, 0x00f0, 0x00f1,
0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089,
0x008a, 0x0089, 0x0089, 0x008a, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0089, 0x0088,
0x0088, 0x0089, 0x0089, 0x0088, 0x0089, 0x0089, 0x0089, 0x0089,
0x0089, 0x0000, 0x00f1, 0x0000, 0x00f3, 0x00f4, 0x00f4, 0x00f4,
0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f3, 0x00f4,
// Entry 100 - 13F
0x00f1, 0x00f0, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f0, 0x00f1,
0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x00f1, 0x0000, 0x010d, 0x0000,
0x010f, 0x0000, 0x0111, 0x0000, 0x0113, 0x0113, 0x0000, 0x0116,
0x0116, 0x0116, 0x0116, 0x0000, 0x011b, 0x0000, 0x011d, 0x0000,
0x011f, 0x011f, 0x0000, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x00f3, 0x00f3, 0x00f4, 0x00f4, 0x00f3, 0x00f4, 0x00f4, 0x00f4,
0x00f4, 0x00f3, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4, 0x00f4,
0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116,
0x0116, 0x0000, 0x0119, 0x0119, 0x0119, 0x0119, 0x0000, 0x011e,
0x0000, 0x0120, 0x0000, 0x0122, 0x0122, 0x0000, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
// Entry 140 - 17F
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122, 0x0122,
0x0122, 0x0000, 0x0151, 0x0000, 0x0153, 0x0000, 0x0155, 0x0000,
0x0157, 0x0000, 0x0159, 0x0000, 0x015b, 0x015b, 0x015b, 0x0000,
0x015f, 0x0000, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
0x0166, 0x0166, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
0x0000, 0x0170, 0x0170, 0x0000, 0x0173, 0x0000, 0x0175, 0x0000,
0x0177, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125, 0x0125,
0x0125, 0x0125, 0x0125, 0x0125, 0x0000, 0x0154, 0x0000, 0x0156,
0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e,
0x015e, 0x015e, 0x0000, 0x0162, 0x0000, 0x0000, 0x0165, 0x0000,
0x0167, 0x0000, 0x0169, 0x0169, 0x0169, 0x0000, 0x016d, 0x0000,
0x016f, 0x0000, 0x0171, 0x0000, 0x0173, 0x0173, 0x0000, 0x0176,
0x0000, 0x0178, 0x0000, 0x017a, 0x0000, 0x017c, 0x0000, 0x017e,
// Entry 180 - 1BF
0x017f, 0x0000, 0x0181, 0x0181, 0x0181, 0x0181, 0x0000, 0x0000,
0x0187, 0x0000, 0x0000, 0x018a, 0x0000, 0x018c, 0x0000, 0x0000,
0x018f, 0x0000, 0x0191, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000,
0x0197, 0x0000, 0x0199, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x0000, 0x0180, 0x0000, 0x0000, 0x0183, 0x0000, 0x0185, 0x0185,
0x0185, 0x0185, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x018e,
0x0000, 0x0190, 0x0000, 0x0000, 0x0193, 0x0000, 0x0195, 0x0000,
0x0000, 0x0198, 0x0000, 0x0000, 0x019b, 0x0000, 0x019d, 0x0000,
0x019f, 0x0000, 0x01a1, 0x0000, 0x01a3, 0x0000, 0x01a5, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x01ab, 0x0000, 0x01ae,
0x0000, 0x01b0, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x0000, 0x01b9, 0x0000, 0x01bb, 0x0000, 0x01bd, 0x0000,
0x01a7, 0x0000, 0x01a9, 0x0000, 0x01ab, 0x0000, 0x01ad, 0x0000,
0x01af, 0x01af, 0x0000, 0x01b2, 0x0000, 0x01b4, 0x0000, 0x01b6,
0x0000, 0x01b8, 0x0000, 0x01ba, 0x0000, 0x0000, 0x01bd, 0x0000,
// Entry 1C0 - 1FF
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x01c5,
0x01c5, 0x01c5, 0x0000, 0x01ca, 0x0000, 0x01cc, 0x01cc, 0x0000,
0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x01d9, 0x0000, 0x01dc, 0x0000, 0x01de,
0x01bf, 0x0000, 0x01c1, 0x0000, 0x01c3, 0x0000, 0x01c5, 0x0000,
0x01c7, 0x0000, 0x01c9, 0x01c9, 0x01c9, 0x01c9, 0x0000, 0x01ce,
0x0000, 0x01d0, 0x01d0, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x0000, 0x01dd, 0x01dd,
0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
0x01ee, 0x01ee, 0x0000, 0x01f2, 0x0000, 0x01f4, 0x0000, 0x01f6,
0x0000, 0x01f8, 0x0000, 0x0000, 0x01fb, 0x0000, 0x01fd, 0x01fd,
0x0000, 0x01f0, 0x0000, 0x01f2, 0x01f2, 0x01f2, 0x0000, 0x01f6,
0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x0000,
// Entry 200 - 23F
0x0000, 0x0200, 0x0000, 0x0202, 0x0202, 0x0000, 0x0205, 0x0205,
0x0000, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208, 0x0208,
0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x021a, 0x0000, 0x0000, 0x021d, 0x0000,
0x021f, 0x021f, 0x0000, 0x0222, 0x0000, 0x0224, 0x0224, 0x0000,
0x0000, 0x0228, 0x0227, 0x0227, 0x0000, 0x0000, 0x022d, 0x0000,
0x022f, 0x0000, 0x0231, 0x0000, 0x023d, 0x0233, 0x023d, 0x023d,
0x023d, 0x023d, 0x023d, 0x023d, 0x023d, 0x0233, 0x023d, 0x023d,
0x01ff, 0x0000, 0x0201, 0x0201, 0x0000, 0x0204, 0x0000, 0x0206,
0x0206, 0x0000, 0x0209, 0x0209, 0x0000, 0x020c, 0x020c, 0x020c,
0x020c, 0x020c, 0x020c, 0x020c, 0x0000, 0x0214, 0x0000, 0x0216,
0x0000, 0x0218, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x021e,
0x0000, 0x0000, 0x0221, 0x0000, 0x0223, 0x0223, 0x0000, 0x0226,
0x0000, 0x0228, 0x0228, 0x0000, 0x0000, 0x022c, 0x022b, 0x022b,
0x0000, 0x0000, 0x0231, 0x0000, 0x0233, 0x0000, 0x0235, 0x0000,
0x0241, 0x0237, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241, 0x0241,
// Entry 240 - 27F
0x0000, 0x0240, 0x0240, 0x0240, 0x0000, 0x0244, 0x0000, 0x0246,
0x0000, 0x0248, 0x0248, 0x0000, 0x024b, 0x0000, 0x024d, 0x024d,
0x024d, 0x024d, 0x024d, 0x024d, 0x0000, 0x0254, 0x0000, 0x0256,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x0000,
0x025f, 0x025f, 0x025f, 0x0000, 0x0263, 0x0000, 0x0265, 0x0000,
0x0267, 0x0000, 0x0000, 0x026a, 0x0269, 0x0269, 0x0000, 0x026e,
0x0000, 0x0270, 0x0000, 0x0272, 0x0000, 0x0000, 0x0000, 0x0000,
0x0277, 0x0000, 0x0000, 0x027a, 0x0000, 0x027c, 0x027c, 0x027c,
0x0241, 0x0237, 0x0241, 0x0241, 0x0000, 0x0244, 0x0244, 0x0244,
0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x024c, 0x0000,
0x024f, 0x0000, 0x0251, 0x0251, 0x0251, 0x0251, 0x0251, 0x0251,
0x0000, 0x0258, 0x0000, 0x025a, 0x0000, 0x025c, 0x0000, 0x025e,
0x0000, 0x0260, 0x0000, 0x0262, 0x0000, 0x0000, 0x0265, 0x0265,
0x0265, 0x0000, 0x0269, 0x0000, 0x026b, 0x0000, 0x026d, 0x0000,
0x0000, 0x0270, 0x026f, 0x026f, 0x0000, 0x0274, 0x0000, 0x0276,
0x0000, 0x0278, 0x0000, 0x0000, 0x0000, 0x0000, 0x027d, 0x0000,
// Entry 280 - 2BF
0x027c, 0x0000, 0x0281, 0x0281, 0x0281, 0x0000, 0x0285, 0x0285,
0x0285, 0x0285, 0x0285, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
0x0000, 0x0000, 0x0000, 0x0000, 0x0293, 0x0293, 0x0293, 0x0000,
0x0297, 0x0297, 0x0297, 0x0297, 0x0000, 0x0000, 0x029d, 0x029d,
0x029d, 0x029d, 0x0000, 0x02a2, 0x0000, 0x02a4, 0x02a4, 0x0000,
0x02a7, 0x0000, 0x02a9, 0x02a9, 0x0000, 0x0000, 0x02ad, 0x0000,
0x0000, 0x02b0, 0x0000, 0x02b2, 0x02b2, 0x0000, 0x0000, 0x02b6,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x0000, 0x02bc, 0x0000, 0x02be,
0x0000, 0x0280, 0x0000, 0x0282, 0x0282, 0x0282, 0x0282, 0x0000,
0x0287, 0x0287, 0x0287, 0x0000, 0x028b, 0x028b, 0x028b, 0x028b,
0x028b, 0x0000, 0x0291, 0x0291, 0x0291, 0x0291, 0x0000, 0x0000,
0x0000, 0x0000, 0x0299, 0x0299, 0x0299, 0x0000, 0x029d, 0x029d,
0x029d, 0x029d, 0x0000, 0x0000, 0x02a3, 0x02a3, 0x02a3, 0x02a3,
0x0000, 0x02a8, 0x0000, 0x02aa, 0x02aa, 0x0000, 0x02ad, 0x0000,
0x02af, 0x0000, 0x02b1, 0x02b1, 0x0000, 0x0000, 0x02b5, 0x0000,
0x0000, 0x02b8, 0x0000, 0x02ba, 0x02ba, 0x0000, 0x0000, 0x02be,
// Entry 2C0 - 2FF
0x02be, 0x0000, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x02c1, 0x02c1,
0x0000, 0x0000, 0x02c9, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02ce,
0x0000, 0x02d0, 0x0000, 0x02d2, 0x0000, 0x0000, 0x02d5, 0x0000,
0x0000, 0x0000, 0x02d9, 0x0000, 0x02db, 0x0000, 0x02dd, 0x0000,
0x02df, 0x02df, 0x0000, 0x02e2, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x02e6, 0x02e6, 0x02e6, 0x02e6, 0x0000, 0x02ec, 0x02ed, 0x02ec,
0x0000, 0x02f0,
} // Size: 1532 bytes
0x0000, 0x02c0, 0x0000, 0x02c2, 0x0000, 0x02c4, 0x0000, 0x02c6,
0x0000, 0x02c8, 0x02c8, 0x0000, 0x0000, 0x02cc, 0x0000, 0x02ce,
0x02cb, 0x02cb, 0x0000, 0x0000, 0x02d3, 0x02d2, 0x02d2, 0x0000,
0x0000, 0x02d8, 0x0000, 0x02da, 0x0000, 0x02dc, 0x0000, 0x0000,
0x02df, 0x0000, 0x02e1, 0x0000, 0x0000, 0x02e4, 0x0000, 0x02e6,
0x0000, 0x02e8, 0x0000, 0x02ea, 0x02ea, 0x0000, 0x0000, 0x02ee,
0x02ed, 0x02ed, 0x0000, 0x02f2, 0x0000, 0x02f4, 0x02f4, 0x02f4,
0x02f4, 0x02f4, 0x0000, 0x02fa, 0x02fb, 0x02fa, 0x0000, 0x02fe,
} // Size: 1560 bytes
// Total table size 1532 bytes (1KiB); checksum: 90718A2
// Total table size 1560 bytes (1KiB); checksum: 4897681C

View file

@ -1,67 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package tag
import (
"strings"
"testing"
)
var strdata = []string{
"aa ",
"aaa ",
"aaaa",
"aaab",
"aab ",
"ab ",
"ba ",
"xxxx",
"\xff\xff\xff\xff",
}
var testCases = map[string]int{
"a": 0,
"aa": 0,
"aaa": 1,
"aa ": 0,
"aaaa": 2,
"aaab": 3,
"b": 6,
"ba": 6,
" ": -1,
"aaax": -1,
"bbbb": -1,
"zzzz": -1,
}
func TestIndex(t *testing.T) {
index := Index(strings.Join(strdata, ""))
for k, v := range testCases {
if i := index.Index([]byte(k)); i != v {
t.Errorf("%s: got %d; want %d", k, i, v)
}
}
}
func TestFixCase(t *testing.T) {
tests := []string{
"aaaa", "AbCD", "abcd",
"Zzzz", "AbCD", "Abcd",
"Zzzz", "AbC", "",
"XXX", "ab ", "",
"XXX", "usd", "USD",
"cmn", "AB ", "",
"gsw", "CMN", "cmn",
}
for tc := tests; len(tc) > 0; tc = tc[3:] {
b := []byte(tc[1])
if !FixCase(tc[0], b) {
b = nil
}
if string(b) != tc[2] {
t.Errorf("FixCase(%q, %q) = %q; want %q", tc[0], tc[1], b, tc[2])
}
}
}

View file

@ -1,875 +0,0 @@
// This file is generated with "go test -tags generate". DO NOT EDIT!
// +build !generate
package triegen_test
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *randTrie) lookup(s []byte) (v uint8, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return randValues[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = randIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *randTrie) lookupUnsafe(s []byte) uint8 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return randValues[c0]
}
i := randIndex[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = randIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = randIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *randTrie) lookupString(s string) (v uint8, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return randValues[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := randIndex[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = randIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = randIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *randTrie) lookupStringUnsafe(s string) uint8 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return randValues[c0]
}
i := randIndex[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = randIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = randIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// randTrie. Total size: 9280 bytes (9.06 KiB). Checksum: 6debd324a8debb8f.
type randTrie struct{}
func newRandTrie(i int) *randTrie {
return &randTrie{}
}
// lookupValue determines the type of block n and looks up the value for b.
func (t *randTrie) lookupValue(n uint32, b byte) uint8 {
switch {
default:
return uint8(randValues[n<<6+uint32(b)])
}
}
// randValues: 56 blocks, 3584 entries, 3584 bytes
// The third block is the zero block.
var randValues = [3584]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc9: 0x0001,
// Block 0x4, offset 0x100
0x100: 0x0001,
// Block 0x5, offset 0x140
0x155: 0x0001,
// Block 0x6, offset 0x180
0x196: 0x0001,
// Block 0x7, offset 0x1c0
0x1ef: 0x0001,
// Block 0x8, offset 0x200
0x206: 0x0001,
// Block 0x9, offset 0x240
0x258: 0x0001,
// Block 0xa, offset 0x280
0x288: 0x0001,
// Block 0xb, offset 0x2c0
0x2f2: 0x0001,
// Block 0xc, offset 0x300
0x304: 0x0001,
// Block 0xd, offset 0x340
0x34b: 0x0001,
// Block 0xe, offset 0x380
0x3ba: 0x0001,
// Block 0xf, offset 0x3c0
0x3f5: 0x0001,
// Block 0x10, offset 0x400
0x41d: 0x0001,
// Block 0x11, offset 0x440
0x442: 0x0001,
// Block 0x12, offset 0x480
0x4bb: 0x0001,
// Block 0x13, offset 0x4c0
0x4e9: 0x0001,
// Block 0x14, offset 0x500
0x53e: 0x0001,
// Block 0x15, offset 0x540
0x55f: 0x0001,
// Block 0x16, offset 0x580
0x5b7: 0x0001,
// Block 0x17, offset 0x5c0
0x5d9: 0x0001,
// Block 0x18, offset 0x600
0x60e: 0x0001,
// Block 0x19, offset 0x640
0x652: 0x0001,
// Block 0x1a, offset 0x680
0x68f: 0x0001,
// Block 0x1b, offset 0x6c0
0x6dc: 0x0001,
// Block 0x1c, offset 0x700
0x703: 0x0001,
// Block 0x1d, offset 0x740
0x741: 0x0001,
// Block 0x1e, offset 0x780
0x79b: 0x0001,
// Block 0x1f, offset 0x7c0
0x7f1: 0x0001,
// Block 0x20, offset 0x800
0x833: 0x0001,
// Block 0x21, offset 0x840
0x853: 0x0001,
// Block 0x22, offset 0x880
0x8a2: 0x0001,
// Block 0x23, offset 0x8c0
0x8f8: 0x0001,
// Block 0x24, offset 0x900
0x917: 0x0001,
// Block 0x25, offset 0x940
0x945: 0x0001,
// Block 0x26, offset 0x980
0x99e: 0x0001,
// Block 0x27, offset 0x9c0
0x9fd: 0x0001,
// Block 0x28, offset 0xa00
0xa0d: 0x0001,
// Block 0x29, offset 0xa40
0xa66: 0x0001,
// Block 0x2a, offset 0xa80
0xaab: 0x0001,
// Block 0x2b, offset 0xac0
0xaea: 0x0001,
// Block 0x2c, offset 0xb00
0xb2d: 0x0001,
// Block 0x2d, offset 0xb40
0xb54: 0x0001,
// Block 0x2e, offset 0xb80
0xb90: 0x0001,
// Block 0x2f, offset 0xbc0
0xbe5: 0x0001,
// Block 0x30, offset 0xc00
0xc28: 0x0001,
// Block 0x31, offset 0xc40
0xc7c: 0x0001,
// Block 0x32, offset 0xc80
0xcbf: 0x0001,
// Block 0x33, offset 0xcc0
0xcc7: 0x0001,
// Block 0x34, offset 0xd00
0xd34: 0x0001,
// Block 0x35, offset 0xd40
0xd61: 0x0001,
// Block 0x36, offset 0xd80
0xdb9: 0x0001,
// Block 0x37, offset 0xdc0
0xdda: 0x0001,
}
// randIndex: 89 blocks, 5696 entries, 5696 bytes
// Block 0 is the zero block.
var randIndex = [5696]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xe1: 0x02, 0xe3: 0x03, 0xe4: 0x04,
0xea: 0x05, 0xeb: 0x06, 0xec: 0x07,
0xf0: 0x10, 0xf1: 0x24, 0xf2: 0x3d, 0xf3: 0x4f, 0xf4: 0x56,
// Block 0x4, offset 0x100
0x107: 0x01,
// Block 0x5, offset 0x140
0x16c: 0x02,
// Block 0x6, offset 0x180
0x19c: 0x03,
0x1ae: 0x04,
// Block 0x7, offset 0x1c0
0x1d8: 0x05,
0x1f7: 0x06,
// Block 0x8, offset 0x200
0x20c: 0x07,
// Block 0x9, offset 0x240
0x24a: 0x08,
// Block 0xa, offset 0x280
0x2b6: 0x09,
// Block 0xb, offset 0x2c0
0x2d5: 0x0a,
// Block 0xc, offset 0x300
0x31a: 0x0b,
// Block 0xd, offset 0x340
0x373: 0x0c,
// Block 0xe, offset 0x380
0x38b: 0x0d,
// Block 0xf, offset 0x3c0
0x3f0: 0x0e,
// Block 0x10, offset 0x400
0x433: 0x0f,
// Block 0x11, offset 0x440
0x45d: 0x10,
// Block 0x12, offset 0x480
0x491: 0x08, 0x494: 0x09, 0x497: 0x0a,
0x49b: 0x0b, 0x49c: 0x0c,
0x4a1: 0x0d,
0x4ad: 0x0e,
0x4ba: 0x0f,
// Block 0x13, offset 0x4c0
0x4c1: 0x11,
// Block 0x14, offset 0x500
0x531: 0x12,
// Block 0x15, offset 0x540
0x546: 0x13,
// Block 0x16, offset 0x580
0x5ab: 0x14,
// Block 0x17, offset 0x5c0
0x5d4: 0x11,
0x5fe: 0x11,
// Block 0x18, offset 0x600
0x618: 0x0a,
// Block 0x19, offset 0x640
0x65b: 0x15,
// Block 0x1a, offset 0x680
0x6a0: 0x16,
// Block 0x1b, offset 0x6c0
0x6d2: 0x17,
0x6f6: 0x18,
// Block 0x1c, offset 0x700
0x711: 0x19,
// Block 0x1d, offset 0x740
0x768: 0x1a,
// Block 0x1e, offset 0x780
0x783: 0x1b,
// Block 0x1f, offset 0x7c0
0x7f9: 0x1c,
// Block 0x20, offset 0x800
0x831: 0x1d,
// Block 0x21, offset 0x840
0x85e: 0x1e,
// Block 0x22, offset 0x880
0x898: 0x1f,
// Block 0x23, offset 0x8c0
0x8c7: 0x18,
0x8d5: 0x14,
0x8f7: 0x20,
0x8fe: 0x1f,
// Block 0x24, offset 0x900
0x905: 0x21,
// Block 0x25, offset 0x940
0x966: 0x03,
// Block 0x26, offset 0x980
0x981: 0x07, 0x983: 0x11,
0x989: 0x12, 0x98a: 0x13, 0x98e: 0x14, 0x98f: 0x15,
0x992: 0x16, 0x995: 0x17, 0x996: 0x18,
0x998: 0x19, 0x999: 0x1a, 0x99b: 0x1b, 0x99f: 0x1c,
0x9a3: 0x1d,
0x9ad: 0x1e, 0x9af: 0x1f,
0x9b0: 0x20, 0x9b1: 0x21,
0x9b8: 0x22, 0x9bd: 0x23,
// Block 0x27, offset 0x9c0
0x9cd: 0x22,
// Block 0x28, offset 0xa00
0xa0c: 0x08,
// Block 0x29, offset 0xa40
0xa6f: 0x1c,
// Block 0x2a, offset 0xa80
0xa90: 0x1a,
0xaaf: 0x23,
// Block 0x2b, offset 0xac0
0xae3: 0x19,
0xae8: 0x24,
0xafc: 0x25,
// Block 0x2c, offset 0xb00
0xb13: 0x26,
// Block 0x2d, offset 0xb40
0xb67: 0x1c,
// Block 0x2e, offset 0xb80
0xb8f: 0x0b,
// Block 0x2f, offset 0xbc0
0xbcb: 0x27,
0xbe7: 0x26,
// Block 0x30, offset 0xc00
0xc34: 0x16,
// Block 0x31, offset 0xc40
0xc62: 0x03,
// Block 0x32, offset 0xc80
0xcbb: 0x12,
// Block 0x33, offset 0xcc0
0xcdf: 0x09,
// Block 0x34, offset 0xd00
0xd34: 0x0a,
// Block 0x35, offset 0xd40
0xd41: 0x1e,
// Block 0x36, offset 0xd80
0xd83: 0x28,
// Block 0x37, offset 0xdc0
0xdc0: 0x15,
// Block 0x38, offset 0xe00
0xe1a: 0x15,
// Block 0x39, offset 0xe40
0xe65: 0x29,
// Block 0x3a, offset 0xe80
0xe86: 0x1f,
// Block 0x3b, offset 0xec0
0xeec: 0x18,
// Block 0x3c, offset 0xf00
0xf28: 0x2a,
// Block 0x3d, offset 0xf40
0xf53: 0x08,
// Block 0x3e, offset 0xf80
0xfa2: 0x2b,
0xfaa: 0x17,
// Block 0x3f, offset 0xfc0
0xfc0: 0x25, 0xfc2: 0x26,
0xfc9: 0x27, 0xfcd: 0x28, 0xfce: 0x29,
0xfd5: 0x2a,
0xfd8: 0x2b, 0xfd9: 0x2c, 0xfdf: 0x2d,
0xfe1: 0x2e, 0xfe2: 0x2f, 0xfe3: 0x30, 0xfe6: 0x31,
0xfe9: 0x32, 0xfec: 0x33, 0xfed: 0x34, 0xfef: 0x35,
0xff1: 0x36, 0xff2: 0x37, 0xff3: 0x38, 0xff4: 0x39,
0xffa: 0x3a, 0xffc: 0x3b, 0xffe: 0x3c,
// Block 0x40, offset 0x1000
0x102c: 0x2c,
// Block 0x41, offset 0x1040
0x1074: 0x2c,
// Block 0x42, offset 0x1080
0x108c: 0x08,
0x10a0: 0x2d,
// Block 0x43, offset 0x10c0
0x10e8: 0x10,
// Block 0x44, offset 0x1100
0x110f: 0x13,
// Block 0x45, offset 0x1140
0x114b: 0x2e,
// Block 0x46, offset 0x1180
0x118b: 0x23,
0x119d: 0x0c,
// Block 0x47, offset 0x11c0
0x11c3: 0x12,
0x11f9: 0x0f,
// Block 0x48, offset 0x1200
0x121e: 0x1b,
// Block 0x49, offset 0x1240
0x1270: 0x2f,
// Block 0x4a, offset 0x1280
0x128a: 0x1b,
0x12a7: 0x02,
// Block 0x4b, offset 0x12c0
0x12fb: 0x14,
// Block 0x4c, offset 0x1300
0x1333: 0x30,
// Block 0x4d, offset 0x1340
0x134d: 0x31,
// Block 0x4e, offset 0x1380
0x138e: 0x15,
// Block 0x4f, offset 0x13c0
0x13f4: 0x32,
// Block 0x50, offset 0x1400
0x141b: 0x33,
// Block 0x51, offset 0x1440
0x1448: 0x3e, 0x1449: 0x3f, 0x144a: 0x40, 0x144f: 0x41,
0x1459: 0x42, 0x145c: 0x43, 0x145e: 0x44, 0x145f: 0x45,
0x1468: 0x46, 0x1469: 0x47, 0x146c: 0x48, 0x146d: 0x49, 0x146e: 0x4a,
0x1472: 0x4b, 0x1473: 0x4c,
0x1479: 0x4d, 0x147b: 0x4e,
// Block 0x52, offset 0x1480
0x1480: 0x34,
0x1499: 0x11,
0x14b6: 0x2c,
// Block 0x53, offset 0x14c0
0x14e4: 0x0d,
// Block 0x54, offset 0x1500
0x1527: 0x08,
// Block 0x55, offset 0x1540
0x1555: 0x2b,
// Block 0x56, offset 0x1580
0x15b2: 0x35,
// Block 0x57, offset 0x15c0
0x15f2: 0x1c, 0x15f4: 0x29,
// Block 0x58, offset 0x1600
0x1600: 0x50, 0x1603: 0x51,
0x1608: 0x52, 0x160a: 0x53, 0x160d: 0x54, 0x160e: 0x55,
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *multiTrie) lookup(s []byte) (v uint64, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return t.ascii[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = multiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *multiTrie) lookupUnsafe(s []byte) uint64 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return t.ascii[c0]
}
i := t.utf8Start[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = multiIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = multiIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// lookupString returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *multiTrie) lookupString(s string) (v uint64, sz int) {
c0 := s[0]
switch {
case c0 < 0x80: // is ASCII
return t.ascii[c0], 1
case c0 < 0xC2:
return 0, 1 // Illegal UTF-8: not a starter, not ASCII.
case c0 < 0xE0: // 2-byte UTF-8
if len(s) < 2 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c1), 2
case c0 < 0xF0: // 3-byte UTF-8
if len(s) < 3 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c2), 3
case c0 < 0xF8: // 4-byte UTF-8
if len(s) < 4 {
return 0, 0
}
i := t.utf8Start[c0]
c1 := s[1]
if c1 < 0x80 || 0xC0 <= c1 {
return 0, 1 // Illegal UTF-8: not a continuation byte.
}
o := uint32(i)<<6 + uint32(c1)
i = multiIndex[o]
c2 := s[2]
if c2 < 0x80 || 0xC0 <= c2 {
return 0, 2 // Illegal UTF-8: not a continuation byte.
}
o = uint32(i)<<6 + uint32(c2)
i = multiIndex[o]
c3 := s[3]
if c3 < 0x80 || 0xC0 <= c3 {
return 0, 3 // Illegal UTF-8: not a continuation byte.
}
return t.lookupValue(uint32(i), c3), 4
}
// Illegal rune
return 0, 1
}
// lookupStringUnsafe returns the trie value for the first UTF-8 encoding in s.
// s must start with a full and valid UTF-8 encoded rune.
func (t *multiTrie) lookupStringUnsafe(s string) uint64 {
c0 := s[0]
if c0 < 0x80 { // is ASCII
return t.ascii[c0]
}
i := t.utf8Start[c0]
if c0 < 0xE0 { // 2-byte UTF-8
return t.lookupValue(uint32(i), s[1])
}
i = multiIndex[uint32(i)<<6+uint32(s[1])]
if c0 < 0xF0 { // 3-byte UTF-8
return t.lookupValue(uint32(i), s[2])
}
i = multiIndex[uint32(i)<<6+uint32(s[2])]
if c0 < 0xF8 { // 4-byte UTF-8
return t.lookupValue(uint32(i), s[3])
}
return 0
}
// multiTrie. Total size: 18250 bytes (17.82 KiB). Checksum: a69a609d8696aa5e.
type multiTrie struct {
ascii []uint64 // index for ASCII bytes
utf8Start []uint8 // index for UTF-8 bytes >= 0xC0
}
func newMultiTrie(i int) *multiTrie {
h := multiTrieHandles[i]
return &multiTrie{multiValues[uint32(h.ascii)<<6:], multiIndex[uint32(h.multi)<<6:]}
}
type multiTrieHandle struct {
ascii, multi uint8
}
// multiTrieHandles: 5 handles, 10 bytes
var multiTrieHandles = [5]multiTrieHandle{
{0, 0}, // 8c1e77823143d35c: all
{0, 23}, // 8fb58ff8243b45b0: ASCII only
{0, 23}, // 8fb58ff8243b45b0: ASCII only 2
{0, 24}, // 2ccc43994f11046f: BMP only
{30, 25}, // ce448591bdcb4733: No BMP
}
// lookupValue determines the type of block n and looks up the value for b.
func (t *multiTrie) lookupValue(n uint32, b byte) uint64 {
switch {
default:
return uint64(multiValues[n<<6+uint32(b)])
}
}
// multiValues: 32 blocks, 2048 entries, 16384 bytes
// The third block is the zero block.
var multiValues = [2048]uint64{
// Block 0x0, offset 0x0
0x03: 0x6e361699800b9fb8, 0x04: 0x52d3935a34f6f0b, 0x05: 0x2948319393e7ef10,
0x07: 0x20f03b006704f663, 0x08: 0x6c15c0732bb2495f, 0x09: 0xe54e2c59d953551,
0x0f: 0x33d8a825807d8037, 0x10: 0x6ecd93cb12168b92, 0x11: 0x6a81c9c0ce86e884,
0x1f: 0xa03e77aac8be79b, 0x20: 0x28591d0e7e486efa, 0x21: 0x716fa3bc398dec8,
0x3f: 0x4fd3bcfa72bce8b0,
// Block 0x1, offset 0x40
0x40: 0x3cbaef3db8ba5f12, 0x41: 0x2d262347c1f56357,
0x7f: 0x782caa2d25a418a9,
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc0: 0x6bbd1f937b1ff5d2, 0xc1: 0x732e23088d2eb8a4,
// Block 0x4, offset 0x100
0x13f: 0x56f8c4c82f5962dc,
// Block 0x5, offset 0x140
0x140: 0x57dc4544729a5da2, 0x141: 0x2f62f9cd307ffa0d,
// Block 0x6, offset 0x180
0x1bf: 0x7bf4d0ebf302a088,
// Block 0x7, offset 0x1c0
0x1c0: 0x1f0d67f249e59931, 0x1c1: 0x3011def73aa550c7,
// Block 0x8, offset 0x200
0x23f: 0x5de81c1dff6bf29d,
// Block 0x9, offset 0x240
0x240: 0x752c035737b825e8, 0x241: 0x1e793399081e3bb3,
// Block 0xa, offset 0x280
0x2bf: 0x6a28f01979cbf059,
// Block 0xb, offset 0x2c0
0x2c0: 0x373a4b0f2cbd4c74, 0x2c1: 0x4fd2c288683b767c,
// Block 0xc, offset 0x300
0x33f: 0x5a10ffa9e29184fb,
// Block 0xd, offset 0x340
0x340: 0x700f9bdb53fff6a5, 0x341: 0xcde93df0427eb79,
// Block 0xe, offset 0x380
0x3bf: 0x74071288fff39c76,
// Block 0xf, offset 0x3c0
0x3c0: 0x481fc2f510e5268a, 0x3c1: 0x7565c28164204849,
// Block 0x10, offset 0x400
0x43f: 0x5676a62fd49c6bec,
// Block 0x11, offset 0x440
0x440: 0x2f2d15776cbafc6b, 0x441: 0x4c55e8dc0ff11a3f,
// Block 0x12, offset 0x480
0x4bf: 0x69d6f0fe711fafc9,
// Block 0x13, offset 0x4c0
0x4c0: 0x33181de28cfb062d, 0x4c1: 0x2ef3adc6bb2f2d02,
// Block 0x14, offset 0x500
0x53f: 0xe03b31814c95f8b,
// Block 0x15, offset 0x540
0x540: 0x3bf6dc9a1c115603, 0x541: 0x6984ec9b7f51f7fc,
// Block 0x16, offset 0x580
0x5bf: 0x3c02ea92fb168559,
// Block 0x17, offset 0x5c0
0x5c0: 0x1badfe42e7629494, 0x5c1: 0x6dc4a554005f7645,
// Block 0x18, offset 0x600
0x63f: 0x3bb2ed2a72748f4b,
// Block 0x19, offset 0x640
0x640: 0x291354cd6767ec10, 0x641: 0x2c3a4715e3c070d6,
// Block 0x1a, offset 0x680
0x6bf: 0x352711cfb7236418,
// Block 0x1b, offset 0x6c0
0x6c0: 0x3a59d34fb8bceda, 0x6c1: 0x5e90d8ebedd64fa1,
// Block 0x1c, offset 0x700
0x73f: 0x7191a77b28d23110,
// Block 0x1d, offset 0x740
0x740: 0x4ca7f0c1623423d8, 0x741: 0x4f7156d996e2d0de,
// Block 0x1e, offset 0x780
// Block 0x1f, offset 0x7c0
}
// multiIndex: 29 blocks, 1856 entries, 1856 bytes
// Block 0 is the zero block.
var multiIndex = [1856]uint8{
// Block 0x0, offset 0x0
// Block 0x1, offset 0x40
// Block 0x2, offset 0x80
// Block 0x3, offset 0xc0
0xc2: 0x01, 0xc3: 0x02, 0xc4: 0x03, 0xc7: 0x04,
0xc8: 0x05, 0xcf: 0x06,
0xd0: 0x07,
0xdf: 0x08,
0xe0: 0x02, 0xe1: 0x03, 0xe2: 0x04, 0xe3: 0x05, 0xe4: 0x06, 0xe7: 0x07,
0xe8: 0x08, 0xef: 0x09,
0xf0: 0x0e, 0xf1: 0x11, 0xf2: 0x13, 0xf3: 0x15, 0xf4: 0x17,
// Block 0x4, offset 0x100
0x120: 0x09,
0x13f: 0x0a,
// Block 0x5, offset 0x140
0x140: 0x0b,
0x17f: 0x0c,
// Block 0x6, offset 0x180
0x180: 0x0d,
// Block 0x7, offset 0x1c0
0x1ff: 0x0e,
// Block 0x8, offset 0x200
0x200: 0x0f,
// Block 0x9, offset 0x240
0x27f: 0x10,
// Block 0xa, offset 0x280
0x280: 0x11,
// Block 0xb, offset 0x2c0
0x2ff: 0x12,
// Block 0xc, offset 0x300
0x300: 0x13,
// Block 0xd, offset 0x340
0x37f: 0x14,
// Block 0xe, offset 0x380
0x380: 0x15,
// Block 0xf, offset 0x3c0
0x3ff: 0x16,
// Block 0x10, offset 0x400
0x410: 0x0a,
0x41f: 0x0b,
0x420: 0x0c,
0x43f: 0x0d,
// Block 0x11, offset 0x440
0x440: 0x17,
// Block 0x12, offset 0x480
0x4bf: 0x18,
// Block 0x13, offset 0x4c0
0x4c0: 0x0f,
0x4ff: 0x10,
// Block 0x14, offset 0x500
0x500: 0x19,
// Block 0x15, offset 0x540
0x540: 0x12,
// Block 0x16, offset 0x580
0x5bf: 0x1a,
// Block 0x17, offset 0x5c0
0x5ff: 0x14,
// Block 0x18, offset 0x600
0x600: 0x1b,
// Block 0x19, offset 0x640
0x640: 0x16,
// Block 0x1a, offset 0x680
// Block 0x1b, offset 0x6c0
0x6c2: 0x01, 0x6c3: 0x02, 0x6c4: 0x03, 0x6c7: 0x04,
0x6c8: 0x05, 0x6cf: 0x06,
0x6d0: 0x07,
0x6df: 0x08,
0x6e0: 0x02, 0x6e1: 0x03, 0x6e2: 0x04, 0x6e3: 0x05, 0x6e4: 0x06, 0x6e7: 0x07,
0x6e8: 0x08, 0x6ef: 0x09,
// Block 0x1c, offset 0x700
0x730: 0x0e, 0x731: 0x11, 0x732: 0x13, 0x733: 0x15, 0x734: 0x17,
}

View file

@ -1,71 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen_test
import (
"fmt"
"io"
"io/ioutil"
"golang.org/x/text/internal/triegen"
)
func ExampleCompacter() {
t := triegen.NewTrie("root")
for r := rune(0); r < 10000; r += 64 {
t.Insert(r, 0x9015BADA55^uint64(r))
}
sz, _ := t.Gen(ioutil.Discard)
fmt.Printf("Size normal: %5d\n", sz)
var c myCompacter
sz, _ = t.Gen(ioutil.Discard, triegen.Compact(&c))
fmt.Printf("Size compacted: %5d\n", sz)
// Output:
// Size normal: 81344
// Size compacted: 3224
}
// A myCompacter accepts a block if only the first value is given.
type myCompacter []uint64
func (c *myCompacter) Size(values []uint64) (sz int, ok bool) {
for _, v := range values[1:] {
if v != 0 {
return 0, false
}
}
return 8, true // the size of a uint64
}
func (c *myCompacter) Store(v []uint64) uint32 {
x := uint32(len(*c))
*c = append(*c, v[0])
return x
}
func (c *myCompacter) Print(w io.Writer) error {
fmt.Fprintln(w, "var firstValue = []uint64{")
for _, v := range *c {
fmt.Fprintf(w, "\t%#x,\n", v)
}
fmt.Fprintln(w, "}")
return nil
}
func (c *myCompacter) Handler() string {
return "getFirstValue"
// Where getFirstValue is included along with the generated code:
// func getFirstValue(n uint32, b byte) uint64 {
// if b == 0x80 { // the first continuation byte
// return firstValue[n]
// }
// return 0
// }
}

View file

@ -1,148 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package triegen_test
import (
"fmt"
"io/ioutil"
"math/rand"
"unicode"
"golang.org/x/text/internal/triegen"
)
const seed = 0x12345
var genWriter = ioutil.Discard
func randomRunes() map[rune]uint8 {
rnd := rand.New(rand.NewSource(seed))
m := map[rune]uint8{}
for len(m) < 100 {
// Only set our random rune if it is a valid Unicode code point.
if r := rune(rnd.Int31n(unicode.MaxRune + 1)); []rune(string(r))[0] == r {
m[r] = 1
}
}
return m
}
// Example_build shows how to build a simple trie. It assigns the value 1 to
// 100 random runes generated by randomRunes.
func Example_build() {
t := triegen.NewTrie("rand")
for r, _ := range randomRunes() {
t.Insert(r, 1)
}
sz, err := t.Gen(genWriter)
fmt.Printf("Trie size: %d bytes\n", sz)
fmt.Printf("Error: %v\n", err)
// Output:
// Trie size: 9280 bytes
// Error: <nil>
}
// Example_lookup demonstrates how to use the trie generated by Example_build.
func Example_lookup() {
trie := newRandTrie(0)
// The same set of runes used by Example_build.
runes := randomRunes()
// Verify the right value is returned for all runes.
for r := rune(0); r <= unicode.MaxRune; r++ {
// Note that the return type of lookup is uint8.
if v, _ := trie.lookupString(string(r)); v != runes[r] {
fmt.Println("FAILURE")
return
}
}
fmt.Println("SUCCESS")
// Output:
// SUCCESS
}
// runeValues generates some random values for a set of interesting runes.
func runeValues() map[rune]uint64 {
rnd := rand.New(rand.NewSource(seed))
m := map[rune]uint64{}
for p := 4; p <= unicode.MaxRune; p <<= 1 {
for d := -1; d <= 1; d++ {
m[rune(p+d)] = uint64(rnd.Int63())
}
}
return m
}
// ExampleGen_build demonstrates the creation of multiple tries sharing common
// blocks. ExampleGen_lookup demonstrates how to use the generated tries.
func ExampleGen_build() {
var tries []*triegen.Trie
rv := runeValues()
for _, c := range []struct {
include func(rune) bool
name string
}{
{func(r rune) bool { return true }, "all"},
{func(r rune) bool { return r < 0x80 }, "ASCII only"},
{func(r rune) bool { return r < 0x80 }, "ASCII only 2"},
{func(r rune) bool { return r <= 0xFFFF }, "BMP only"},
{func(r rune) bool { return r > 0xFFFF }, "No BMP"},
} {
t := triegen.NewTrie(c.name)
tries = append(tries, t)
for r, v := range rv {
if c.include(r) {
t.Insert(r, v)
}
}
}
sz, err := triegen.Gen(genWriter, "multi", tries)
fmt.Printf("Trie size: %d bytes\n", sz)
fmt.Printf("Error: %v\n", err)
// Output:
// Trie size: 18250 bytes
// Error: <nil>
}
// ExampleGen_lookup shows how to look up values in the trie generated by
// ExampleGen_build.
func ExampleGen_lookup() {
rv := runeValues()
for i, include := range []func(rune) bool{
func(r rune) bool { return true }, // all
func(r rune) bool { return r < 0x80 }, // ASCII only
func(r rune) bool { return r < 0x80 }, // ASCII only 2
func(r rune) bool { return r <= 0xFFFF }, // BMP only
func(r rune) bool { return r > 0xFFFF }, // No BMP
} {
t := newMultiTrie(i)
for r := rune(0); r <= unicode.MaxRune; r++ {
x := uint64(0)
if include(r) {
x = rv[r]
}
// As we convert from a valid rune, we know it is safe to use
// lookupStringUnsafe.
if v := t.lookupStringUnsafe(string(r)); x != v {
fmt.Println("FAILURE")
return
}
}
}
fmt.Println("SUCCESS")
// Output:
// SUCCESS
}

View file

@ -1,68 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build generate
package triegen_test
// The code in this file generates captures and writes the tries generated in
// the examples to data_test.go. To invoke it, run:
// go test -tags=generate
//
// Making the generation code a "test" allows us to link in the necessary test
// code.
import (
"log"
"os"
"os/exec"
)
func init() {
const tmpfile = "tmpout"
const dstfile = "data_test.go"
f, err := os.Create(tmpfile)
if err != nil {
log.Fatalf("Could not create output file: %v", err)
}
defer os.Remove(tmpfile)
defer f.Close()
// We exit before this function returns, regardless of success or failure,
// so there's no need to save (and later restore) the existing genWriter
// value.
genWriter = f
f.Write([]byte(header))
Example_build()
ExampleGen_build()
if err := exec.Command("gofmt", "-w", tmpfile).Run(); err != nil {
log.Fatal(err)
}
os.Remove(dstfile)
os.Rename(tmpfile, dstfile)
os.Exit(0)
}
const header = `// This file is generated with "go test -tags generate". DO NOT EDIT!
// +build !generate
package triegen_test
`
// Stubs for generated tries. These are needed as we exclude data_test.go if
// the generate flag is set. This will clearly make the tests fail, but that
// is okay. It allows us to bootstrap.
type trie struct{}
func (t *trie) lookupString(string) (uint8, int) { return 0, 1 }
func (t *trie) lookupStringUnsafe(string) uint64 { return 0 }
func newRandTrie(i int) *trie { return &trie{} }
func newMultiTrie(i int) *trie { return &trie{} }

View file

@ -1,81 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package ucd_test
import (
"fmt"
"strings"
"golang.org/x/text/internal/ucd"
)
func Example() {
// Read rune-by-rune from UnicodeData.
var count int
p := ucd.New(strings.NewReader(unicodeData))
for p.Next() {
count++
if lower := p.Runes(ucd.SimpleLowercaseMapping); lower != nil {
fmt.Printf("lower(%U) -> %U\n", p.Rune(0), lower[0])
}
}
if err := p.Err(); err != nil {
fmt.Println(err)
}
fmt.Println("Number of runes visited:", count)
// Read raw ranges from Scripts.
p = ucd.New(strings.NewReader(scripts), ucd.KeepRanges)
for p.Next() {
start, end := p.Range(0)
fmt.Printf("%04X..%04X: %s\n", start, end, p.String(1))
}
if err := p.Err(); err != nil {
fmt.Println(err)
}
// Output:
// lower(U+00C0) -> U+00E0
// lower(U+00C1) -> U+00E1
// lower(U+00C2) -> U+00E2
// lower(U+00C3) -> U+00E3
// lower(U+00C4) -> U+00E4
// Number of runes visited: 6594
// 0000..001F: Common
// 0020..0020: Common
// 0021..0023: Common
// 0024..0024: Common
}
// Excerpt from UnicodeData.txt
const unicodeData = `
00B9;SUPERSCRIPT ONE;No;0;EN;<super> 0031;;1;1;N;SUPERSCRIPT DIGIT ONE;;;;
00BA;MASCULINE ORDINAL INDICATOR;Lo;0;L;<super> 006F;;;;N;;;;;
00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK;Pf;0;ON;;;;;Y;RIGHT POINTING GUILLEMET;;;;
00BC;VULGAR FRACTION ONE QUARTER;No;0;ON;<fraction> 0031 2044 0034;;;1/4;N;FRACTION ONE QUARTER;;;;
00BD;VULGAR FRACTION ONE HALF;No;0;ON;<fraction> 0031 2044 0032;;;1/2;N;FRACTION ONE HALF;;;;
00BE;VULGAR FRACTION THREE QUARTERS;No;0;ON;<fraction> 0033 2044 0034;;;3/4;N;FRACTION THREE QUARTERS;;;;
00BF;INVERTED QUESTION MARK;Po;0;ON;;;;;N;;;;;
00C0;LATIN CAPITAL LETTER A WITH GRAVE;Lu;0;L;0041 0300;;;;N;LATIN CAPITAL LETTER A GRAVE;;;00E0;
00C1;LATIN CAPITAL LETTER A WITH ACUTE;Lu;0;L;0041 0301;;;;N;LATIN CAPITAL LETTER A ACUTE;;;00E1;
00C2;LATIN CAPITAL LETTER A WITH CIRCUMFLEX;Lu;0;L;0041 0302;;;;N;LATIN CAPITAL LETTER A CIRCUMFLEX;;;00E2;
00C3;LATIN CAPITAL LETTER A WITH TILDE;Lu;0;L;0041 0303;;;;N;LATIN CAPITAL LETTER A TILDE;;;00E3;
00C4;LATIN CAPITAL LETTER A WITH DIAERESIS;Lu;0;L;0041 0308;;;;N;LATIN CAPITAL LETTER A DIAERESIS;;;00E4;
# A legacy rune range.
3400;<CJK Ideograph Extension A, First>;Lo;0;L;;;;;N;;;;;
4DB5;<CJK Ideograph Extension A, Last>;Lo;0;L;;;;;N;;;;;
`
// Excerpt from Scripts.txt
const scripts = `
# Property: Script
# ================================================
0000..001F ; Common # Cc [32] <control-0000>..<control-001F>
0020 ; Common # Zs SPACE
0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN
0024 ; Common # Sc DOLLAR SIGN
`

View file

@ -1,105 +0,0 @@
package ucd
import (
"strings"
"testing"
)
const file = `
# Comments should be skipped
# rune; bool; uint; int; float; runes; # Y
0..0005; Y; 0; 2; -5.25 ; 0 1 2 3 4 5;
6..0007; Yes ; 6; 1; -4.25 ; 0006 0007;
8; T ; 8 ; 0 ;-3.25 ;;# T
9; True ;9 ; -1;-2.25 ; 0009;
# more comments to be ignored
@Part0
A; N; 10 ; -2; -1.25; ;# N
B; No; 11 ; -3; -0.25;
C; False;12; -4; 0.75;
D; ;13;-5;1.75;
@Part1 # Another part.
# We test part comments get removed by not commenting the the next line.
E..10FFFF; F; 14 ; -6; 2.75;
`
var want = []struct {
start, end rune
}{
{0x00, 0x05},
{0x06, 0x07},
{0x08, 0x08},
{0x09, 0x09},
{0x0A, 0x0A},
{0x0B, 0x0B},
{0x0C, 0x0C},
{0x0D, 0x0D},
{0x0E, 0x10FFFF},
}
func TestGetters(t *testing.T) {
parts := [][2]string{
{"Part0", ""},
{"Part1", "Another part."},
}
handler := func(p *Parser) {
if len(parts) == 0 {
t.Error("Part handler invoked too many times.")
return
}
want := parts[0]
parts = parts[1:]
if got0, got1 := p.String(0), p.Comment(); got0 != want[0] || got1 != want[1] {
t.Errorf(`part: got %q, %q; want %q"`, got0, got1, want)
}
}
p := New(strings.NewReader(file), KeepRanges, Part(handler))
for i := 0; p.Next(); i++ {
start, end := p.Range(0)
w := want[i]
if start != w.start || end != w.end {
t.Fatalf("%d:Range(0); got %#x..%#x; want %#x..%#x", i, start, end, w.start, w.end)
}
if w.start == w.end && p.Rune(0) != w.start {
t.Errorf("%d:Range(0).start: got %U; want %U", i, p.Rune(0), w.start)
}
if got, want := p.Bool(1), w.start <= 9; got != want {
t.Errorf("%d:Bool(1): got %v; want %v", i, got, want)
}
if got := p.Rune(4); got != 0 || p.Err() == nil {
t.Errorf("%d:Rune(%q): got no error; want error", i, p.String(1))
}
p.err = nil
if got := p.Uint(2); rune(got) != start {
t.Errorf("%d:Uint(2): got %v; want %v", i, got, start)
}
if got, want := p.Int(3), 2-i; got != want {
t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
}
if got, want := p.Float(4), -5.25+float64(i); got != want {
t.Errorf("%d:Int(3): got %v; want %v", i, got, want)
}
if got := p.Runes(5); got == nil {
if p.String(5) != "" {
t.Errorf("%d:Runes(5): expected non-empty list", i)
}
} else {
if got[0] != start || got[len(got)-1] != end {
t.Errorf("%d:Runes(5): got %#x; want %#x..%#x", i, got, start, end)
}
}
if got := p.Comment(); got != "" && got != p.String(1) {
t.Errorf("%d:Comment(): got %v; want %v", i, got, p.String(1))
}
}
if err := p.Err(); err != nil {
t.Errorf("Parser error: %v", err)
}
if len(parts) != 0 {
t.Errorf("expected %d more invocations of part handler", len(parts))
}
}

View file

@ -1,154 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"fmt"
"reflect"
"testing"
)
func TestSupported(t *testing.T) {
// To prove the results are correct for a type, we test that the number of
// results is identical to the number of results on record, that all results
// are distinct and that all results are valid.
tests := map[string]int{
"BaseLanguages": numLanguages,
"Scripts": numScripts,
"Regions": numRegions,
"Tags": 0,
}
sup := reflect.ValueOf(Supported)
for name, num := range tests {
v := sup.MethodByName(name).Call(nil)[0]
if n := v.Len(); n != num {
t.Errorf("len(%s()) was %d; want %d", name, n, num)
}
dup := make(map[string]bool)
for i := 0; i < v.Len(); i++ {
x := v.Index(i).Interface()
// An invalid value will either cause a crash or result in a
// duplicate when passed to Sprint.
s := fmt.Sprint(x)
if dup[s] {
t.Errorf("%s: duplicate entry %q", name, s)
}
dup[s] = true
}
if len(dup) != v.Len() {
t.Errorf("%s: # unique entries was %d; want %d", name, len(dup), v.Len())
}
}
}
func TestNewCoverage(t *testing.T) {
bases := []Base{Base{0}, Base{3}, Base{7}}
scripts := []Script{Script{11}, Script{17}, Script{23}}
regions := []Region{Region{101}, Region{103}, Region{107}}
tags := []Tag{Make("pt"), Make("en"), Make("en-GB"), Make("en-US"), Make("pt-PT")}
fbases := func() []Base { return bases }
fscripts := func() []Script { return scripts }
fregions := func() []Region { return regions }
ftags := func() []Tag { return tags }
tests := []struct {
desc string
list []interface{}
bases []Base
scripts []Script
regions []Region
tags []Tag
}{
{
desc: "empty",
},
{
desc: "bases",
list: []interface{}{bases},
bases: bases,
},
{
desc: "scripts",
list: []interface{}{scripts},
scripts: scripts,
},
{
desc: "regions",
list: []interface{}{regions},
regions: regions,
},
{
desc: "bases derives from tags",
list: []interface{}{tags},
bases: []Base{Base{_en}, Base{_pt}},
tags: tags,
},
{
desc: "tags and bases",
list: []interface{}{tags, bases},
bases: bases,
tags: tags,
},
{
desc: "fully specified",
list: []interface{}{tags, bases, scripts, regions},
bases: bases,
scripts: scripts,
regions: regions,
tags: tags,
},
{
desc: "bases func",
list: []interface{}{fbases},
bases: bases,
},
{
desc: "scripts func",
list: []interface{}{fscripts},
scripts: scripts,
},
{
desc: "regions func",
list: []interface{}{fregions},
regions: regions,
},
{
desc: "tags func",
list: []interface{}{ftags},
bases: []Base{Base{_en}, Base{_pt}},
tags: tags,
},
{
desc: "tags and bases",
list: []interface{}{ftags, fbases},
bases: bases,
tags: tags,
},
{
desc: "fully specified",
list: []interface{}{ftags, fbases, fscripts, fregions},
bases: bases,
scripts: scripts,
regions: regions,
tags: tags,
},
}
for i, tt := range tests {
l := NewCoverage(tt.list...)
if a := l.BaseLanguages(); !reflect.DeepEqual(a, tt.bases) {
t.Errorf("%d:%s: BaseLanguages was %v; want %v", i, tt.desc, a, tt.bases)
}
if a := l.Scripts(); !reflect.DeepEqual(a, tt.scripts) {
t.Errorf("%d:%s: Scripts was %v; want %v", i, tt.desc, a, tt.scripts)
}
if a := l.Regions(); !reflect.DeepEqual(a, tt.regions) {
t.Errorf("%d:%s: Regions was %v; want %v", i, tt.desc, a, tt.regions)
}
if a := l.Tags(); !reflect.DeepEqual(a, tt.tags) {
t.Errorf("%d:%s: Tags was %v; want %v", i, tt.desc, a, tt.tags)
}
}
}

View file

@ -1,413 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language_test
import (
"fmt"
"net/http"
"golang.org/x/text/language"
)
func ExampleCanonType() {
p := func(id string) {
fmt.Printf("Default(%s) -> %s\n", id, language.Make(id))
fmt.Printf("BCP47(%s) -> %s\n", id, language.BCP47.Make(id))
fmt.Printf("Macro(%s) -> %s\n", id, language.Macro.Make(id))
fmt.Printf("All(%s) -> %s\n", id, language.All.Make(id))
}
p("en-Latn")
p("sh")
p("zh-cmn")
p("bjd")
p("iw-Latn-fonipa-u-cu-usd")
// Output:
// Default(en-Latn) -> en-Latn
// BCP47(en-Latn) -> en
// Macro(en-Latn) -> en-Latn
// All(en-Latn) -> en
// Default(sh) -> sr-Latn
// BCP47(sh) -> sh
// Macro(sh) -> sh
// All(sh) -> sr-Latn
// Default(zh-cmn) -> cmn
// BCP47(zh-cmn) -> cmn
// Macro(zh-cmn) -> zh
// All(zh-cmn) -> zh
// Default(bjd) -> drl
// BCP47(bjd) -> drl
// Macro(bjd) -> bjd
// All(bjd) -> drl
// Default(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
// BCP47(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
// Macro(iw-Latn-fonipa-u-cu-usd) -> iw-Latn-fonipa-u-cu-usd
// All(iw-Latn-fonipa-u-cu-usd) -> he-Latn-fonipa-u-cu-usd
}
func ExampleTag_Base() {
fmt.Println(language.Make("und").Base())
fmt.Println(language.Make("und-US").Base())
fmt.Println(language.Make("und-NL").Base())
fmt.Println(language.Make("und-419").Base()) // Latin America
fmt.Println(language.Make("und-ZZ").Base())
// Output:
// en Low
// en High
// nl High
// es Low
// en Low
}
func ExampleTag_Script() {
en := language.Make("en")
sr := language.Make("sr")
sr_Latn := language.Make("sr_Latn")
fmt.Println(en.Script())
fmt.Println(sr.Script())
// Was a script explicitly specified?
_, c := sr.Script()
fmt.Println(c == language.Exact)
_, c = sr_Latn.Script()
fmt.Println(c == language.Exact)
// Output:
// Latn High
// Cyrl Low
// false
// true
}
func ExampleTag_Region() {
ru := language.Make("ru")
en := language.Make("en")
fmt.Println(ru.Region())
fmt.Println(en.Region())
// Output:
// RU Low
// US Low
}
func ExampleRegion_TLD() {
us := language.MustParseRegion("US")
gb := language.MustParseRegion("GB")
uk := language.MustParseRegion("UK")
bu := language.MustParseRegion("BU")
fmt.Println(us.TLD())
fmt.Println(gb.TLD())
fmt.Println(uk.TLD())
fmt.Println(bu.TLD())
fmt.Println(us.Canonicalize().TLD())
fmt.Println(gb.Canonicalize().TLD())
fmt.Println(uk.Canonicalize().TLD())
fmt.Println(bu.Canonicalize().TLD())
// Output:
// US <nil>
// UK <nil>
// UK <nil>
// ZZ language: region is not a valid ccTLD
// US <nil>
// UK <nil>
// UK <nil>
// MM <nil>
}
func ExampleCompose() {
nl, _ := language.ParseBase("nl")
us, _ := language.ParseRegion("US")
de := language.Make("de-1901-u-co-phonebk")
jp := language.Make("ja-JP")
fi := language.Make("fi-x-ing")
u, _ := language.ParseExtension("u-nu-arabic")
x, _ := language.ParseExtension("x-piglatin")
// Combine a base language and region.
fmt.Println(language.Compose(nl, us))
// Combine a base language and extension.
fmt.Println(language.Compose(nl, x))
// Replace the region.
fmt.Println(language.Compose(jp, us))
// Combine several tags.
fmt.Println(language.Compose(us, nl, u))
// Replace the base language of a tag.
fmt.Println(language.Compose(de, nl))
fmt.Println(language.Compose(de, nl, u))
// Remove the base language.
fmt.Println(language.Compose(de, language.Base{}))
// Remove all variants.
fmt.Println(language.Compose(de, []language.Variant{}))
// Remove all extensions.
fmt.Println(language.Compose(de, []language.Extension{}))
fmt.Println(language.Compose(fi, []language.Extension{}))
// Remove all variants and extensions.
fmt.Println(language.Compose(de.Raw()))
// An error is gobbled or returned if non-nil.
fmt.Println(language.Compose(language.ParseRegion("ZA")))
fmt.Println(language.Compose(language.ParseRegion("HH")))
// Compose uses the same Default canonicalization as Make.
fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))
// Call compose on a different CanonType for different results.
fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))
// Output:
// nl-US <nil>
// nl-x-piglatin <nil>
// ja-US <nil>
// nl-US-u-nu-arabic <nil>
// nl-1901-u-co-phonebk <nil>
// nl-1901-u-nu-arabic <nil>
// und-1901-u-co-phonebk <nil>
// de-u-co-phonebk <nil>
// de-1901 <nil>
// fi <nil>
// de <nil>
// und-ZA <nil>
// und language: subtag "HH" is well-formed but unknown
// en-Latn-GB <nil>
// en-GB <nil>
}
func ExampleParse_errors() {
for _, s := range []string{"Foo", "Bar", "Foobar"} {
_, err := language.Parse(s)
if err != nil {
if inv, ok := err.(language.ValueError); ok {
fmt.Println(inv.Subtag())
} else {
fmt.Println(s)
}
}
}
for _, s := range []string{"en", "aa-Uuuu", "AC", "ac-u"} {
_, err := language.Parse(s)
switch e := err.(type) {
case language.ValueError:
fmt.Printf("%s: culprit %q\n", s, e.Subtag())
case nil:
// No error.
default:
// A syntax error.
fmt.Printf("%s: ill-formed\n", s)
}
}
// Output:
// foo
// Foobar
// aa-Uuuu: culprit "Uuuu"
// AC: culprit "ac"
// ac-u: ill-formed
}
func ExampleParent() {
p := func(tag string) {
fmt.Printf("parent(%v): %v\n", tag, language.Make(tag).Parent())
}
p("zh-CN")
// Australian English inherits from World English.
p("en-AU")
// If the tag has a different maximized script from its parent, a tag with
// this maximized script is inserted. This allows different language tags
// which have the same base language and script in common to inherit from
// a common set of settings.
p("zh-HK")
// If the maximized script of the parent is not identical, CLDR will skip
// inheriting from it, as it means there will not be many entries in common
// and inheriting from it is nonsensical.
p("zh-Hant")
// The parent of a tag with variants and extensions is the tag with all
// variants and extensions removed.
p("de-1994-u-co-phonebk")
// Remove default script.
p("de-Latn-LU")
// Output:
// parent(zh-CN): zh
// parent(en-AU): en-001
// parent(zh-HK): zh-Hant
// parent(zh-Hant): und
// parent(de-1994-u-co-phonebk): de
// parent(de-Latn-LU): de
}
// ExampleMatcher_bestMatch gives some examples of getting the best match of
// a set of tags to any of the tags of given set.
func ExampleMatcher() {
// This is the set of tags from which we want to pick the best match. These
// can be, for example, the supported languages for some package.
tags := []language.Tag{
language.English,
language.BritishEnglish,
language.French,
language.Afrikaans,
language.BrazilianPortuguese,
language.EuropeanPortuguese,
language.Croatian,
language.SimplifiedChinese,
language.Raw.Make("iw-IL"),
language.Raw.Make("iw"),
language.Raw.Make("he"),
}
m := language.NewMatcher(tags)
// A simple match.
fmt.Println(m.Match(language.Make("fr")))
// Australian English is closer to British than American English.
fmt.Println(m.Match(language.Make("en-AU")))
// Default to the first tag passed to the Matcher if there is no match.
fmt.Println(m.Match(language.Make("ar")))
// Get the default tag.
fmt.Println(m.Match())
fmt.Println("----")
// Someone specifying sr-Latn is probably fine with getting Croatian.
fmt.Println(m.Match(language.Make("sr-Latn")))
// We match SimplifiedChinese, but with Low confidence.
fmt.Println(m.Match(language.TraditionalChinese))
// Serbian in Latin script is a closer match to Croatian than Traditional
// Chinese to Simplified Chinese.
fmt.Println(m.Match(language.TraditionalChinese, language.Make("sr-Latn")))
fmt.Println("----")
// In case a multiple variants of a language are available, the most spoken
// variant is typically returned.
fmt.Println(m.Match(language.Portuguese))
// Pick the first value passed to Match in case of a tie.
fmt.Println(m.Match(language.Dutch, language.Make("fr-BE"), language.Make("af-NA")))
fmt.Println(m.Match(language.Dutch, language.Make("af-NA"), language.Make("fr-BE")))
fmt.Println("----")
// If a Matcher is initialized with a language and it's deprecated version,
// it will distinguish between them.
fmt.Println(m.Match(language.Raw.Make("iw")))
// However, for non-exact matches, it will treat deprecated versions as
// equivalent and consider other factors first.
fmt.Println(m.Match(language.Raw.Make("he-IL")))
fmt.Println("----")
// User settings passed to the Unicode extension are ignored for matching
// and preserved in the returned tag.
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("fr-u-cu-frf")))
// Even if the matching language is different.
fmt.Println(m.Match(language.Make("de-u-co-phonebk"), language.Make("br-u-cu-frf")))
// If there is no matching language, the options of the first preferred tag are used.
fmt.Println(m.Match(language.Make("de-u-co-phonebk")))
// Output:
// fr 2 Exact
// en-GB 1 High
// en 0 No
// en 0 No
// ----
// hr 6 High
// zh-Hans 7 Low
// hr 6 High
// ----
// pt-BR 4 High
// fr 2 High
// af 3 High
// ----
// iw 9 Exact
// he 10 Exact
// ----
// fr-u-cu-frf 2 Exact
// fr-u-cu-frf 2 High
// en-u-co-phonebk 0 No
// TODO: "he" should be "he-u-rg-IL High"
}
func ExampleMatchStrings() {
// languages supported by this service:
matcher := language.NewMatcher([]language.Tag{
language.English, language.Dutch, language.German,
})
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
lang, _ := r.Cookie("lang")
tag, _ := language.MatchStrings(matcher, lang.String(), r.Header.Get("Accept-Language"))
fmt.Println("User language:", tag)
})
}
func ExampleComprehends() {
// Various levels of comprehensibility.
fmt.Println(language.Comprehends(language.English, language.English))
fmt.Println(language.Comprehends(language.AmericanEnglish, language.BritishEnglish))
// An explicit Und results in no match.
fmt.Println(language.Comprehends(language.English, language.Und))
fmt.Println("----")
// There is usually no mutual comprehensibility between different scripts.
fmt.Println(language.Comprehends(language.Make("en-Dsrt"), language.English))
// One exception is for Traditional versus Simplified Chinese, albeit with
// a low confidence.
fmt.Println(language.Comprehends(language.TraditionalChinese, language.SimplifiedChinese))
fmt.Println("----")
// A Swiss German speaker will often understand High German.
fmt.Println(language.Comprehends(language.Make("gsw"), language.Make("de")))
// The converse is not generally the case.
fmt.Println(language.Comprehends(language.Make("de"), language.Make("gsw")))
// Output:
// Exact
// High
// No
// ----
// No
// Low
// ----
// High
// No
}
func ExampleTag_values() {
us := language.MustParseRegion("US")
en := language.MustParseBase("en")
lang, _, region := language.AmericanEnglish.Raw()
fmt.Println(lang == en, region == us)
lang, _, region = language.BritishEnglish.Raw()
fmt.Println(lang == en, region == us)
// Tags can be compared for exact equivalence using '=='.
en_us, _ := language.Compose(en, us)
fmt.Println(en_us == language.AmericanEnglish)
// Output:
// true true
// true false
// true
}

View file

@ -1497,8 +1497,14 @@ func (b *builder) writeMatchData() {
if desired == supported && desired == "*_*_*" {
continue
}
if desired != supported { // (Weird but correct.)
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
if desired != supported {
// This is now supported by CLDR, but only one case, which
// should already be covered by paradigm locales. For instance,
// test case "und, en, en-GU, en-IN, en-GB ; en-ZA ; en-GB" in
// testdata/CLDRLocaleMatcherTest.txt tests this.
if supported != "en_*_GB" {
log.Fatalf("not supported: desired=%q; supported=%q", desired, supported)
}
continue
}
ri := regionIntelligibility{

View file

@ -49,7 +49,7 @@ func main() {
defer func() {
buf := &bytes.Buffer{}
if _, err = w.WriteGo(buf, "language"); err != nil {
if _, err = w.WriteGo(buf, "language", ""); err != nil {
log.Fatalf("Error formatting file index.go: %v", err)
}

View file

@ -1,48 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language_test
import (
"fmt"
"net/http"
"strings"
"golang.org/x/text/language"
)
// matcher is a language.Matcher configured for all supported languages.
var matcher = language.NewMatcher([]language.Tag{
language.BritishEnglish,
language.Norwegian,
language.German,
})
// handler is a http.HandlerFunc.
func handler(w http.ResponseWriter, r *http.Request) {
t, q, err := language.ParseAcceptLanguage(r.Header.Get("Accept-Language"))
// We ignore the error: the default language will be selected for t == nil.
tag, _, _ := matcher.Match(t...)
fmt.Printf("%5v (t: %6v; q: %3v; err: %v)\n", tag, t, q, err)
}
func ExampleParseAcceptLanguage() {
for _, al := range []string{
"nn;q=0.3, en-us;q=0.8, en,",
"gsw, en;q=0.7, en-US;q=0.8",
"gsw, nl, da",
"invalid",
} {
// Create dummy request with Accept-Language set and pass it to handler.
r, _ := http.NewRequest("GET", "example.com", strings.NewReader("Hello"))
r.Header.Set("Accept-Language", al)
handler(nil, r)
}
// Output:
// en-GB (t: [ en en-US nn]; q: [ 1 0.8 0.3]; err: <nil>)
// en-GB (t: [ gsw en-US en]; q: [ 1 0.8 0.7]; err: <nil>)
// de (t: [ gsw nl da]; q: [ 1 1 1]; err: <nil>)
// en-GB (t: []; q: []; err: language: tag is not well-formed)
}

File diff suppressed because it is too large Load diff

View file

@ -299,6 +299,26 @@ func (t Tag) String() string {
return string(buf[:t.genCoreBytes(buf[:])])
}
// MarshalText implements encoding.TextMarshaler.
func (t Tag) MarshalText() (text []byte, err error) {
if t.str != "" {
text = append(text, t.str...)
} else if t.script == 0 && t.region == 0 {
text = append(text, t.lang.String()...)
} else {
buf := [maxCoreSize]byte{}
text = buf[:t.genCoreBytes(buf[:])]
}
return text, nil
}
// UnmarshalText implements encoding.TextUnmarshaler.
func (t *Tag) UnmarshalText(text []byte) error {
tag, err := Raw.Parse(string(text))
*t = tag
return err
}
// Base returns the base language of the language tag. If the base language is
// unspecified, an attempt will be made to infer it from the context.
// It uses a variant of CLDR's Add Likely Subtags algorithm. This is subject to change.

View file

@ -1,878 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"reflect"
"testing"
"golang.org/x/text/internal/testtext"
)
func TestTagSize(t *testing.T) {
id := Tag{}
typ := reflect.TypeOf(id)
if typ.Size() > 24 {
t.Errorf("size of Tag was %d; want 24", typ.Size())
}
}
func TestIsRoot(t *testing.T) {
loc := Tag{}
if !loc.IsRoot() {
t.Errorf("unspecified should be root.")
}
for i, tt := range parseTests() {
loc, _ := Parse(tt.in)
undef := tt.lang == "und" && tt.script == "" && tt.region == "" && tt.ext == ""
if loc.IsRoot() != undef {
t.Errorf("%d: was %v; want %v", i, loc.IsRoot(), undef)
}
}
}
func TestEquality(t *testing.T) {
for i, tt := range parseTests()[48:49] {
s := tt.in
tag := Make(s)
t1 := Make(tag.String())
if tag != t1 {
t.Errorf("%d:%s: equality test 1 failed\n got: %#v\nwant: %#v)", i, s, t1, tag)
}
t2, _ := Compose(tag)
if tag != t2 {
t.Errorf("%d:%s: equality test 2 failed\n got: %#v\nwant: %#v", i, s, t2, tag)
}
}
}
func TestMakeString(t *testing.T) {
tests := []struct{ in, out string }{
{"und", "und"},
{"und", "und-CW"},
{"nl", "nl-NL"},
{"de-1901", "nl-1901"},
{"de-1901", "de-Arab-1901"},
{"x-a-b", "de-Arab-x-a-b"},
{"x-a-b", "x-a-b"},
}
for i, tt := range tests {
id, _ := Parse(tt.in)
mod, _ := Parse(tt.out)
id.setTagsFrom(mod)
for j := 0; j < 2; j++ {
id.remakeString()
if str := id.String(); str != tt.out {
t.Errorf("%d:%d: found %s; want %s", i, j, id.String(), tt.out)
}
}
// The bytes to string conversion as used in remakeString
// occasionally measures as more than one alloc, breaking this test.
// To alleviate this we set the number of runs to more than 1.
if n := testtext.AllocsPerRun(8, id.remakeString); n > 1 {
t.Errorf("%d: # allocs got %.1f; want <= 1", i, n)
}
}
}
func TestCompactIndex(t *testing.T) {
tests := []struct {
tag string
index int
ok bool
}{
// TODO: these values will change with each CLDR update. This issue
// will be solved if we decide to fix the indexes.
{"und", 0, true},
{"ca-ES-valencia", 1, true},
{"ca-ES-valencia-u-va-posix", 0, false},
{"ca-ES-valencia-u-co-phonebk", 1, true},
{"ca-ES-valencia-u-co-phonebk-va-posix", 0, false},
{"x-klingon", 0, false},
{"en-US", 229, true},
{"en-US-u-va-posix", 2, true},
{"en", 133, true},
{"en-u-co-phonebk", 133, true},
{"en-001", 134, true},
{"sh", 0, false}, // We don't normalize.
}
for _, tt := range tests {
x, ok := CompactIndex(Raw.MustParse(tt.tag))
if x != tt.index || ok != tt.ok {
t.Errorf("%s: got %d, %v; want %d %v", tt.tag, x, ok, tt.index, tt.ok)
}
}
}
func TestBase(t *testing.T) {
tests := []struct {
loc, lang string
conf Confidence
}{
{"und", "en", Low},
{"x-abc", "und", No},
{"en", "en", Exact},
{"und-Cyrl", "ru", High},
// If a region is not included, the official language should be English.
{"und-US", "en", High},
// TODO: not-explicitly listed scripts should probably be und, No
// Modify addTags to return info on how the match was derived.
// {"und-Aghb", "und", No},
}
for i, tt := range tests {
loc, _ := Parse(tt.loc)
lang, conf := loc.Base()
if lang.String() != tt.lang {
t.Errorf("%d: language was %s; want %s", i, lang, tt.lang)
}
if conf != tt.conf {
t.Errorf("%d: confidence was %d; want %d", i, conf, tt.conf)
}
}
}
func TestParseBase(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"en", "en", true},
{"EN", "en", true},
{"nld", "nl", true},
{"dut", "dut", true}, // bibliographic
{"aaj", "und", false}, // unknown
{"qaa", "qaa", true},
{"a", "und", false},
{"", "und", false},
{"aaaa", "und", false},
}
for i, tt := range tests {
x, err := ParseBase(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if y, _, _ := Raw.Make(tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
func TestScript(t *testing.T) {
tests := []struct {
loc, scr string
conf Confidence
}{
{"und", "Latn", Low},
{"en-Latn", "Latn", Exact},
{"en", "Latn", High},
{"sr", "Cyrl", Low},
{"kk", "Cyrl", High},
{"kk-CN", "Arab", Low},
{"cmn", "Hans", Low},
{"ru", "Cyrl", High},
{"ru-RU", "Cyrl", High},
{"yue", "Hant", Low},
{"x-abc", "Zzzz", Low},
{"und-zyyy", "Zyyy", Exact},
}
for i, tt := range tests {
loc, _ := Parse(tt.loc)
sc, conf := loc.Script()
if sc.String() != tt.scr {
t.Errorf("%d:%s: script was %s; want %s", i, tt.loc, sc, tt.scr)
}
if conf != tt.conf {
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
}
}
}
func TestParseScript(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"Latn", "Latn", true},
{"zzzz", "Zzzz", true},
{"zyyy", "Zyyy", true},
{"Latm", "Zzzz", false},
{"Zzz", "Zzzz", false},
{"", "Zzzz", false},
{"Zzzxx", "Zzzz", false},
}
for i, tt := range tests {
x, err := ParseScript(tt.in)
if x.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, x, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, y, _ := Raw.Make("und-" + tt.out).Raw(); x != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, x, y)
}
}
}
}
func TestRegion(t *testing.T) {
tests := []struct {
loc, reg string
conf Confidence
}{
{"und", "US", Low},
{"en", "US", Low},
{"zh-Hant", "TW", Low},
{"en-US", "US", Exact},
{"cmn", "CN", Low},
{"ru", "RU", Low},
{"yue", "HK", Low},
{"x-abc", "ZZ", Low},
}
for i, tt := range tests {
loc, _ := Raw.Parse(tt.loc)
reg, conf := loc.Region()
if reg.String() != tt.reg {
t.Errorf("%d:%s: region was %s; want %s", i, tt.loc, reg, tt.reg)
}
if conf != tt.conf {
t.Errorf("%d:%s: confidence was %d; want %d", i, tt.loc, conf, tt.conf)
}
}
}
func TestEncodeM49(t *testing.T) {
tests := []struct {
m49 int
code string
ok bool
}{
{1, "001", true},
{840, "US", true},
{899, "ZZ", false},
}
for i, tt := range tests {
if r, err := EncodeM49(tt.m49); r.String() != tt.code || err == nil != tt.ok {
t.Errorf("%d:%d: was %s, %v; want %s, %v", i, tt.m49, r, err == nil, tt.code, tt.ok)
}
}
for i := 1; i <= 1000; i++ {
if r, err := EncodeM49(i); err == nil && r.M49() == 0 {
t.Errorf("%d has no error, but maps to undefined region", i)
}
}
}
func TestParseRegion(t *testing.T) {
tests := []struct {
in string
out string
ok bool
}{
{"001", "001", true},
{"840", "US", true},
{"899", "ZZ", false},
{"USA", "US", true},
{"US", "US", true},
{"BC", "ZZ", false},
{"C", "ZZ", false},
{"CCCC", "ZZ", false},
{"01", "ZZ", false},
}
for i, tt := range tests {
r, err := ParseRegion(tt.in)
if r.String() != tt.out || err == nil != tt.ok {
t.Errorf("%d:%s: was %s, %v; want %s, %v", i, tt.in, r, err == nil, tt.out, tt.ok)
}
if err == nil {
if _, _, y := Raw.Make("und-" + tt.out).Raw(); r != y {
t.Errorf("%d:%s: tag was %s; want %s", i, tt.in, r, y)
}
}
}
}
func TestIsCountry(t *testing.T) {
tests := []struct {
reg string
country bool
}{
{"US", true},
{"001", false},
{"958", false},
{"419", false},
{"203", true},
{"020", true},
{"900", false},
{"999", false},
{"QO", false},
{"EU", false},
{"AA", false},
{"XK", true},
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
r := Region{reg}
if r.IsCountry() != tt.country {
t.Errorf("%d: IsCountry(%s) was %v; want %v", i, tt.reg, r.IsCountry(), tt.country)
}
}
}
func TestIsGroup(t *testing.T) {
tests := []struct {
reg string
group bool
}{
{"US", false},
{"001", true},
{"958", false},
{"419", true},
{"203", false},
{"020", false},
{"900", false},
{"999", false},
{"QO", true},
{"EU", true},
{"AA", false},
{"XK", false},
}
for i, tt := range tests {
reg, _ := getRegionID([]byte(tt.reg))
r := Region{reg}
if r.IsGroup() != tt.group {
t.Errorf("%d: IsGroup(%s) was %v; want %v", i, tt.reg, r.IsGroup(), tt.group)
}
}
}
func TestContains(t *testing.T) {
tests := []struct {
enclosing, contained string
contains bool
}{
// A region contains itself.
{"US", "US", true},
{"001", "001", true},
// Direct containment.
{"001", "002", true},
{"039", "XK", true},
{"150", "XK", true},
{"EU", "AT", true},
{"QO", "AQ", true},
// Indirect containemnt.
{"001", "US", true},
{"001", "419", true},
{"001", "013", true},
// No containment.
{"US", "001", false},
{"155", "EU", false},
}
for i, tt := range tests {
enc, _ := getRegionID([]byte(tt.enclosing))
con, _ := getRegionID([]byte(tt.contained))
r := Region{enc}
if got := r.Contains(Region{con}); got != tt.contains {
t.Errorf("%d: %s.Contains(%s) was %v; want %v", i, tt.enclosing, tt.contained, got, tt.contains)
}
}
}
func TestRegionCanonicalize(t *testing.T) {
for i, tt := range []struct{ in, out string }{
{"UK", "GB"},
{"TP", "TL"},
{"QU", "EU"},
{"SU", "SU"},
{"VD", "VN"},
{"DD", "DE"},
} {
r := MustParseRegion(tt.in)
want := MustParseRegion(tt.out)
if got := r.Canonicalize(); got != want {
t.Errorf("%d: got %v; want %v", i, got, want)
}
}
}
func TestRegionTLD(t *testing.T) {
for _, tt := range []struct {
in, out string
ok bool
}{
{"EH", "EH", true},
{"FR", "FR", true},
{"TL", "TL", true},
// In ccTLD before in ISO.
{"GG", "GG", true},
// Non-standard assignment of ccTLD to ISO code.
{"GB", "UK", true},
// Exceptionally reserved in ISO and valid ccTLD.
{"UK", "UK", true},
{"AC", "AC", true},
{"EU", "EU", true},
{"SU", "SU", true},
// Exceptionally reserved in ISO and invalid ccTLD.
{"CP", "ZZ", false},
{"DG", "ZZ", false},
{"EA", "ZZ", false},
{"FX", "ZZ", false},
{"IC", "ZZ", false},
{"TA", "ZZ", false},
// Transitionally reserved in ISO (e.g. deprecated) but valid ccTLD as
// it is still being phased out.
{"AN", "AN", true},
{"TP", "TP", true},
// Transitionally reserved in ISO (e.g. deprecated) and invalid ccTLD.
// Defined in package language as it has a mapping in CLDR.
{"BU", "ZZ", false},
{"CS", "ZZ", false},
{"NT", "ZZ", false},
{"YU", "ZZ", false},
{"ZR", "ZZ", false},
// Not defined in package: SF.
// Indeterminately reserved in ISO.
// Defined in package language as it has a legacy mapping in CLDR.
{"DY", "ZZ", false},
{"RH", "ZZ", false},
{"VD", "ZZ", false},
// Not defined in package: EW, FL, JA, LF, PI, RA, RB, RC, RI, RL, RM,
// RN, RP, WG, WL, WV, and YV.
// Not assigned in ISO, but legacy definitions in CLDR.
{"DD", "ZZ", false},
{"YD", "ZZ", false},
// Normal mappings but somewhat special status in ccTLD.
{"BL", "BL", true},
{"MF", "MF", true},
{"BV", "BV", true},
{"SJ", "SJ", true},
// Have values when normalized, but not as is.
{"QU", "ZZ", false},
// ISO Private Use.
{"AA", "ZZ", false},
{"QM", "ZZ", false},
{"QO", "ZZ", false},
{"XA", "ZZ", false},
{"XK", "ZZ", false}, // Sometimes used for Kosovo, but invalid ccTLD.
} {
if tt.in == "" {
continue
}
r := MustParseRegion(tt.in)
var want Region
if tt.out != "ZZ" {
want = MustParseRegion(tt.out)
}
tld, err := r.TLD()
if got := err == nil; got != tt.ok {
t.Errorf("error(%v): got %v; want %v", r, got, tt.ok)
}
if tld != want {
t.Errorf("TLD(%v): got %v; want %v", r, tld, want)
}
}
}
func TestCanonicalize(t *testing.T) {
// TODO: do a full test using CLDR data in a separate regression test.
tests := []struct {
in, out string
option CanonType
}{
{"en-Latn", "en", SuppressScript},
{"sr-Cyrl", "sr-Cyrl", SuppressScript},
{"sh", "sr-Latn", Legacy},
{"sh-HR", "sr-Latn-HR", Legacy},
{"sh-Cyrl-HR", "sr-Cyrl-HR", Legacy},
{"tl", "fil", Legacy},
{"no", "no", Legacy},
{"no", "nb", Legacy | CLDR},
{"cmn", "cmn", Legacy},
{"cmn", "zh", Macro},
{"cmn-u-co-stroke", "zh-u-co-stroke", Macro},
{"yue", "yue", Macro},
{"nb", "no", Macro},
{"nb", "nb", Macro | CLDR},
{"no", "no", Macro},
{"no", "no", Macro | CLDR},
{"iw", "he", DeprecatedBase},
{"iw", "he", Deprecated | CLDR},
{"mo", "ro-MD", Deprecated}, // Adopted by CLDR as of version 25.
{"alb", "sq", Legacy}, // bibliographic
{"dut", "nl", Legacy}, // bibliographic
// As of CLDR 25, mo is no longer considered a legacy mapping.
{"mo", "mo", Legacy | CLDR},
{"und-AN", "und-AN", Deprecated},
{"und-YD", "und-YE", DeprecatedRegion},
{"und-YD", "und-YD", DeprecatedBase},
{"und-Qaai", "und-Zinh", DeprecatedScript},
{"und-Qaai", "und-Qaai", DeprecatedBase},
{"drh", "mn", All}, // drh -> khk -> mn
}
for i, tt := range tests {
in, _ := Raw.Parse(tt.in)
in, _ = tt.option.Canonicalize(in)
if in.String() != tt.out {
t.Errorf("%d:%s: was %s; want %s", i, tt.in, in.String(), tt.out)
}
if int(in.pVariant) > int(in.pExt) || int(in.pExt) > len(in.str) {
t.Errorf("%d:%s:offsets %d <= %d <= %d must be true", i, tt.in, in.pVariant, in.pExt, len(in.str))
}
}
// Test idempotence.
for _, base := range Supported.BaseLanguages() {
tag, _ := Raw.Compose(base)
got, _ := All.Canonicalize(tag)
want, _ := All.Canonicalize(got)
if got != want {
t.Errorf("idem(%s): got %s; want %s", tag, got, want)
}
}
}
func TestTypeForKey(t *testing.T) {
tests := []struct{ key, in, out string }{
{"co", "en", ""},
{"co", "en-u-abc", ""},
{"co", "en-u-co-phonebk", "phonebk"},
{"co", "en-u-co-phonebk-cu-aud", "phonebk"},
{"co", "x-foo-u-co-phonebk", ""},
{"nu", "en-u-co-phonebk-nu-arabic", "arabic"},
{"kc", "cmn-u-co-stroke", ""},
}
for _, tt := range tests {
if v := Make(tt.in).TypeForKey(tt.key); v != tt.out {
t.Errorf("%q[%q]: was %q; want %q", tt.in, tt.key, v, tt.out)
}
}
}
func TestSetTypeForKey(t *testing.T) {
tests := []struct {
key, value, in, out string
err bool
}{
// replace existing value
{"co", "pinyin", "en-u-co-phonebk", "en-u-co-pinyin", false},
{"co", "pinyin", "en-u-co-phonebk-cu-xau", "en-u-co-pinyin-cu-xau", false},
{"co", "pinyin", "en-u-co-phonebk-v-xx", "en-u-co-pinyin-v-xx", false},
{"co", "pinyin", "en-u-co-phonebk-x-x", "en-u-co-pinyin-x-x", false},
{"nu", "arabic", "en-u-co-phonebk-nu-vaai", "en-u-co-phonebk-nu-arabic", false},
// add to existing -u extension
{"co", "pinyin", "en-u-ca-gregory", "en-u-ca-gregory-co-pinyin", false},
{"co", "pinyin", "en-u-ca-gregory-nu-vaai", "en-u-ca-gregory-co-pinyin-nu-vaai", false},
{"co", "pinyin", "en-u-ca-gregory-v-va", "en-u-ca-gregory-co-pinyin-v-va", false},
{"co", "pinyin", "en-u-ca-gregory-x-a", "en-u-ca-gregory-co-pinyin-x-a", false},
{"ca", "gregory", "en-u-co-pinyin", "en-u-ca-gregory-co-pinyin", false},
// remove pair
{"co", "", "en-u-co-phonebk", "en", false},
{"co", "", "en-u-ca-gregory-co-phonebk", "en-u-ca-gregory", false},
{"co", "", "en-u-co-phonebk-nu-arabic", "en-u-nu-arabic", false},
{"co", "", "en", "en", false},
// add -u extension
{"co", "pinyin", "en", "en-u-co-pinyin", false},
{"co", "pinyin", "und", "und-u-co-pinyin", false},
{"co", "pinyin", "en-a-aaa", "en-a-aaa-u-co-pinyin", false},
{"co", "pinyin", "en-x-aaa", "en-u-co-pinyin-x-aaa", false},
{"co", "pinyin", "en-v-aa", "en-u-co-pinyin-v-aa", false},
{"co", "pinyin", "en-a-aaa-x-x", "en-a-aaa-u-co-pinyin-x-x", false},
{"co", "pinyin", "en-a-aaa-v-va", "en-a-aaa-u-co-pinyin-v-va", false},
// error on invalid values
{"co", "pinyinxxx", "en", "en", true},
{"co", "piny.n", "en", "en", true},
{"co", "pinyinxxx", "en-a-aaa", "en-a-aaa", true},
{"co", "pinyinxxx", "en-u-aaa", "en-u-aaa", true},
{"co", "pinyinxxx", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"co", "pinyi.", "en-u-aaa-co-pinyin", "en-u-aaa-co-pinyin", true},
{"col", "pinyin", "en", "en", true},
{"co", "cu", "en", "en", true},
// error when setting on a private use tag
{"co", "phonebook", "x-foo", "x-foo", true},
}
for i, tt := range tests {
tag := Make(tt.in)
if v, err := tag.SetTypeForKey(tt.key, tt.value); v.String() != tt.out {
t.Errorf("%d:%q[%q]=%q: was %q; want %q", i, tt.in, tt.key, tt.value, v, tt.out)
} else if (err != nil) != tt.err {
t.Errorf("%d:%q[%q]=%q: error was %v; want %v", i, tt.in, tt.key, tt.value, err != nil, tt.err)
} else if val := v.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
if len(tag.String()) <= 3 {
// Simulate a tag for which the string has not been set.
tag.str, tag.pExt, tag.pVariant = "", 0, 0
if tag, err := tag.SetTypeForKey(tt.key, tt.value); err == nil {
if val := tag.TypeForKey(tt.key); err == nil && val != tt.value {
t.Errorf("%d:%q[%q]==%q: was %v; want %v", i, tt.out, tt.key, tt.value, val, tt.value)
}
}
}
}
}
func TestFindKeyAndType(t *testing.T) {
// out is either the matched type in case of a match or the original
// string up till the insertion point.
tests := []struct {
key string
hasExt bool
in, out string
}{
// Don't search past a private use extension.
{"co", false, "en-x-foo-u-co-pinyin", "en"},
{"co", false, "x-foo-u-co-pinyin", ""},
{"co", false, "en-s-fff-x-foo", "en-s-fff"},
// Insertion points in absence of -u extension.
{"cu", false, "en", ""}, // t.str is ""
{"cu", false, "en-v-va", "en"},
{"cu", false, "en-a-va", "en-a-va"},
{"cu", false, "en-a-va-v-va", "en-a-va"},
{"cu", false, "en-x-a", "en"},
// Tags with the -u extension.
{"co", true, "en-u-co-standard", "standard"},
{"co", true, "yue-u-co-pinyin", "pinyin"},
{"co", true, "en-u-co-abc", "abc"},
{"co", true, "en-u-co-abc-def", "abc-def"},
{"co", true, "en-u-co-abc-def-x-foo", "abc-def"},
{"co", true, "en-u-co-standard-nu-arab", "standard"},
{"co", true, "yue-u-co-pinyin-nu-arab", "pinyin"},
// Insertion points.
{"cu", true, "en-u-co-standard", "en-u-co-standard"},
{"cu", true, "yue-u-co-pinyin-x-foo", "yue-u-co-pinyin"},
{"cu", true, "en-u-co-abc", "en-u-co-abc"},
{"cu", true, "en-u-nu-arabic", "en-u"},
{"cu", true, "en-u-co-abc-def-nu-arabic", "en-u-co-abc-def"},
}
for i, tt := range tests {
start, end, hasExt := Make(tt.in).findTypeForKey(tt.key)
if start != end {
res := tt.in[start:end]
if res != tt.out {
t.Errorf("%d:%s: was %q; want %q", i, tt.in, res, tt.out)
}
} else {
if hasExt != tt.hasExt {
t.Errorf("%d:%s: hasExt was %v; want %v", i, tt.in, hasExt, tt.hasExt)
continue
}
if tt.in[:start] != tt.out {
t.Errorf("%d:%s: insertion point was %q; want %q", i, tt.in, tt.in[:start], tt.out)
}
}
}
}
func TestParent(t *testing.T) {
tests := []struct{ in, out string }{
// Strip variants and extensions first
{"de-u-co-phonebk", "de"},
{"de-1994", "de"},
{"de-Latn-1994", "de"}, // remove superfluous script.
// Ensure the canonical Tag for an entry is in the chain for base-script
// pairs.
{"zh-Hans", "zh"},
// Skip the script if it is the maximized version. CLDR files for the
// skipped tag are always empty.
{"zh-Hans-TW", "zh"},
{"zh-Hans-CN", "zh"},
// Insert the script if the maximized script is not the same as the
// maximized script of the base language.
{"zh-TW", "zh-Hant"},
{"zh-HK", "zh-Hant"},
{"zh-Hant-TW", "zh-Hant"},
{"zh-Hant-HK", "zh-Hant"},
// Non-default script skips to und.
// CLDR
{"az-Cyrl", "und"},
{"bs-Cyrl", "und"},
{"en-Dsrt", "und"},
{"ha-Arab", "und"},
{"mn-Mong", "und"},
{"pa-Arab", "und"},
{"shi-Latn", "und"},
{"sr-Latn", "und"},
{"uz-Arab", "und"},
{"uz-Cyrl", "und"},
{"vai-Latn", "und"},
{"zh-Hant", "und"},
// extra
{"nl-Cyrl", "und"},
// World english inherits from en-001.
{"en-150", "en-001"},
{"en-AU", "en-001"},
{"en-BE", "en-001"},
{"en-GG", "en-001"},
{"en-GI", "en-001"},
{"en-HK", "en-001"},
{"en-IE", "en-001"},
{"en-IM", "en-001"},
{"en-IN", "en-001"},
{"en-JE", "en-001"},
{"en-MT", "en-001"},
{"en-NZ", "en-001"},
{"en-PK", "en-001"},
{"en-SG", "en-001"},
// Spanish in Latin-American countries have es-419 as parent.
{"es-AR", "es-419"},
{"es-BO", "es-419"},
{"es-CL", "es-419"},
{"es-CO", "es-419"},
{"es-CR", "es-419"},
{"es-CU", "es-419"},
{"es-DO", "es-419"},
{"es-EC", "es-419"},
{"es-GT", "es-419"},
{"es-HN", "es-419"},
{"es-MX", "es-419"},
{"es-NI", "es-419"},
{"es-PA", "es-419"},
{"es-PE", "es-419"},
{"es-PR", "es-419"},
{"es-PY", "es-419"},
{"es-SV", "es-419"},
{"es-US", "es-419"},
{"es-UY", "es-419"},
{"es-VE", "es-419"},
// exceptions (according to CLDR)
{"es-CW", "es"},
// Inherit from pt-PT, instead of pt for these countries.
{"pt-AO", "pt-PT"},
{"pt-CV", "pt-PT"},
{"pt-GW", "pt-PT"},
{"pt-MO", "pt-PT"},
{"pt-MZ", "pt-PT"},
{"pt-ST", "pt-PT"},
{"pt-TL", "pt-PT"},
}
for _, tt := range tests {
tag := Raw.MustParse(tt.in)
if p := Raw.MustParse(tt.out); p != tag.Parent() {
t.Errorf("%s: was %v; want %v", tt.in, tag.Parent(), p)
}
}
}
var (
// Tags without error that don't need to be changed.
benchBasic = []string{
"en",
"en-Latn",
"en-GB",
"za",
"zh-Hant",
"zh",
"zh-HK",
"ar-MK",
"en-CA",
"fr-CA",
"fr-CH",
"fr",
"lv",
"he-IT",
"tlh",
"ja",
"ja-Jpan",
"ja-Jpan-JP",
"de-1996",
"de-CH",
"sr",
"sr-Latn",
}
// Tags with extensions, not changes required.
benchExt = []string{
"x-a-b-c-d",
"x-aa-bbbb-cccccccc-d",
"en-x_cc-b-bbb-a-aaa",
"en-c_cc-b-bbb-a-aaa-x-x",
"en-u-co-phonebk",
"en-Cyrl-u-co-phonebk",
"en-US-u-co-phonebk-cu-xau",
"en-nedix-u-co-phonebk",
"en-t-t0-abcd",
"en-t-nl-latn",
"en-t-t0-abcd-x-a",
}
// Change, but not memory allocation required.
benchSimpleChange = []string{
"EN",
"i-klingon",
"en-latn",
"zh-cmn-Hans-CN",
"iw-NL",
}
// Change and memory allocation required.
benchChangeAlloc = []string{
"en-c_cc-b-bbb-a-aaa",
"en-u-cu-xua-co-phonebk",
"en-u-cu-xua-co-phonebk-a-cd",
"en-u-def-abc-cu-xua-co-phonebk",
"en-t-en-Cyrl-NL-1994",
"en-t-en-Cyrl-NL-1994-t0-abc-def",
}
// Tags that result in errors.
benchErr = []string{
// IllFormed
"x_A.-B-C_D",
"en-u-cu-co-phonebk",
"en-u-cu-xau-co",
"en-t-nl-abcd",
// Invalid
"xx",
"nl-Uuuu",
"nl-QB",
}
benchChange = append(benchSimpleChange, benchChangeAlloc...)
benchAll = append(append(append(benchBasic, benchExt...), benchChange...), benchErr...)
)
func doParse(b *testing.B, tag []string) {
for i := 0; i < b.N; i++ {
// Use the modulo instead of looping over all tags so that we get a somewhat
// meaningful ns/op.
Parse(tag[i%len(tag)])
}
}
func BenchmarkParse(b *testing.B) {
doParse(b, benchAll)
}
func BenchmarkParseBasic(b *testing.B) {
doParse(b, benchBasic)
}
func BenchmarkParseError(b *testing.B) {
doParse(b, benchErr)
}
func BenchmarkParseSimpleChange(b *testing.B) {
doParse(b, benchSimpleChange)
}
func BenchmarkParseChangeAlloc(b *testing.B) {
doParse(b, benchChangeAlloc)
}

View file

@ -1,457 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"testing"
"golang.org/x/text/internal/tag"
)
func b(s string) []byte {
return []byte(s)
}
func TestLangID(t *testing.T) {
tests := []struct {
id, bcp47, iso3, norm string
err error
}{
{id: "", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: " ", bcp47: "und", iso3: "und", err: errSyntax},
{id: "xxx", bcp47: "und", iso3: "und", err: mkErrInvalid([]byte("xxx"))},
{id: "und", bcp47: "und", iso3: "und"},
{id: "aju", bcp47: "aju", iso3: "aju", norm: "jrb"},
{id: "jrb", bcp47: "jrb", iso3: "jrb"},
{id: "es", bcp47: "es", iso3: "spa"},
{id: "spa", bcp47: "es", iso3: "spa"},
{id: "ji", bcp47: "ji", iso3: "yid-", norm: "yi"},
{id: "jw", bcp47: "jw", iso3: "jav-", norm: "jv"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kw", bcp47: "kw", iso3: "cor"},
{id: "arb", bcp47: "arb", iso3: "arb", norm: "ar"},
{id: "ar", bcp47: "ar", iso3: "ara"},
{id: "kur", bcp47: "ku", iso3: "kur"},
{id: "nl", bcp47: "nl", iso3: "nld"},
{id: "NL", bcp47: "nl", iso3: "nld"},
{id: "gsw", bcp47: "gsw", iso3: "gsw"},
{id: "gSW", bcp47: "gsw", iso3: "gsw"},
{id: "und", bcp47: "und", iso3: "und"},
{id: "sh", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "hbs", bcp47: "sh", iso3: "hbs", norm: "sr"},
{id: "no", bcp47: "no", iso3: "nor", norm: "no"},
{id: "nor", bcp47: "no", iso3: "nor", norm: "no"},
{id: "cmn", bcp47: "cmn", iso3: "cmn", norm: "zh"},
}
for i, tt := range tests {
want, err := getLangID(b(tt.id))
if err != tt.err {
t.Errorf("%d:err(%s): found %q; want %q", i, tt.id, err, tt.err)
}
if err != nil {
continue
}
if id, _ := getLangISO2(b(tt.bcp47)); len(tt.bcp47) == 2 && want != id {
t.Errorf("%d:getISO2(%s): found %v; want %v", i, tt.bcp47, id, want)
}
if len(tt.iso3) == 3 {
if id, _ := getLangISO3(b(tt.iso3)); want != id {
t.Errorf("%d:getISO3(%s): found %q; want %q", i, tt.iso3, id, want)
}
if id, _ := getLangID(b(tt.iso3)); want != id {
t.Errorf("%d:getID3(%s): found %v; want %v", i, tt.iso3, id, want)
}
}
norm := want
if tt.norm != "" {
norm, _ = getLangID(b(tt.norm))
}
id, _ := normLang(want)
if id != norm {
t.Errorf("%d:norm(%s): found %v; want %v", i, tt.id, id, norm)
}
if id := want.String(); tt.bcp47 != id {
t.Errorf("%d:String(): found %s; want %s", i, id, tt.bcp47)
}
if id := want.ISO3(); tt.iso3[:3] != id {
t.Errorf("%d:iso3(): found %s; want %s", i, id, tt.iso3[:3])
}
}
}
func TestGrandfathered(t *testing.T) {
for _, tt := range []struct{ in, out string }{
{"art-lojban", "jbo"},
{"i-ami", "ami"},
{"i-bnn", "bnn"},
{"i-hak", "hak"},
{"i-klingon", "tlh"},
{"i-lux", "lb"},
{"i-navajo", "nv"},
{"i-pwn", "pwn"},
{"i-tao", "tao"},
{"i-tay", "tay"},
{"i-tsu", "tsu"},
{"no-bok", "nb"},
{"no-nyn", "nn"},
{"sgn-BE-FR", "sfb"},
{"sgn-BE-NL", "vgt"},
{"sgn-CH-DE", "sgg"},
{"sgn-ch-de", "sgg"},
{"zh-guoyu", "cmn"},
{"zh-hakka", "hak"},
{"zh-min-nan", "nan"},
{"zh-xiang", "hsn"},
// Grandfathered tags with no modern replacement will be converted as follows:
{"cel-gaulish", "xtg-x-cel-gaulish"},
{"en-GB-oed", "en-GB-oxendict"},
{"en-gb-oed", "en-GB-oxendict"},
{"i-default", "en-x-i-default"},
{"i-enochian", "und-x-i-enochian"},
{"i-mingo", "see-x-i-mingo"},
{"zh-min", "nan-x-zh-min"},
{"root", "und"},
{"en_US_POSIX", "en-US-u-va-posix"},
{"en_us_posix", "en-US-u-va-posix"},
{"en-us-posix", "en-US-u-va-posix"},
} {
got := Raw.Make(tt.in)
want := Raw.MustParse(tt.out)
if got != want {
t.Errorf("%s: got %q; want %q", tt.in, got, want)
}
}
}
func TestRegionID(t *testing.T) {
tests := []struct {
in, out string
}{
{"_ ", ""},
{"_000", ""},
{"419", "419"},
{"AA", "AA"},
{"ATF", "TF"},
{"HV", "HV"},
{"CT", "CT"},
{"DY", "DY"},
{"IC", "IC"},
{"FQ", "FQ"},
{"JT", "JT"},
{"ZZ", "ZZ"},
{"EU", "EU"},
{"QO", "QO"},
{"FX", "FX"},
}
for i, tt := range tests {
if tt.in[0] == '_' {
id := tt.in[1:]
if _, err := getRegionID(b(id)); err == nil {
t.Errorf("%d:err(%s): found nil; want error", i, id)
}
continue
}
want, _ := getRegionID(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:%s: found %q; want %q", i, tt.in, s, tt.out)
}
if len(tt.in) == 2 {
want, _ := getRegionISO2(b(tt.in))
if s := want.String(); s != tt.out {
t.Errorf("%d:getISO2(%s): found %q; want %q", i, tt.in, s, tt.out)
}
}
}
}
func TestRegionType(t *testing.T) {
for _, tt := range []struct {
r string
t byte
}{
{"NL", bcp47Region | ccTLD},
{"EU", bcp47Region | ccTLD}, // exceptionally reserved
{"AN", bcp47Region | ccTLD}, // transitionally reserved
{"DD", bcp47Region}, // deleted in ISO, deprecated in BCP 47
{"NT", bcp47Region}, // transitionally reserved, deprecated in BCP 47
{"XA", iso3166UserAssigned | bcp47Region},
{"ZZ", iso3166UserAssigned | bcp47Region},
{"AA", iso3166UserAssigned | bcp47Region},
{"QO", iso3166UserAssigned | bcp47Region},
{"QM", iso3166UserAssigned | bcp47Region},
{"XK", iso3166UserAssigned | bcp47Region},
{"CT", 0}, // deleted in ISO, not in BCP 47, canonicalized in CLDR
} {
r := MustParseRegion(tt.r)
if tp := r.typ(); tp != tt.t {
t.Errorf("Type(%s): got %x; want %x", tt.r, tp, tt.t)
}
}
}
func TestRegionISO3(t *testing.T) {
tests := []struct {
from, iso3, to string
}{
{" ", "ZZZ", "ZZ"},
{"000", "ZZZ", "ZZ"},
{"AA", "AAA", ""},
{"CT", "CTE", ""},
{"DY", "DHY", ""},
{"EU", "QUU", ""},
{"HV", "HVO", ""},
{"IC", "ZZZ", "ZZ"},
{"JT", "JTN", ""},
{"PZ", "PCZ", ""},
{"QU", "QUU", "EU"},
{"QO", "QOO", ""},
{"YD", "YMD", ""},
{"FQ", "ATF", "TF"},
{"TF", "ATF", ""},
{"FX", "FXX", ""},
{"ZZ", "ZZZ", ""},
{"419", "ZZZ", "ZZ"},
}
for _, tt := range tests {
r, _ := getRegionID(b(tt.from))
if s := r.ISO3(); s != tt.iso3 {
t.Errorf("iso3(%q): found %q; want %q", tt.from, s, tt.iso3)
}
if tt.iso3 == "" {
continue
}
want := tt.to
if tt.to == "" {
want = tt.from
}
r, _ = getRegionID(b(want))
if id, _ := getRegionISO3(b(tt.iso3)); id != r {
t.Errorf("%s: found %q; want %q", tt.iso3, id, want)
}
}
}
func TestRegionM49(t *testing.T) {
fromTests := []struct {
m49 int
id string
}{
{0, ""},
{-1, ""},
{1000, ""},
{10000, ""},
{001, "001"},
{104, "MM"},
{180, "CD"},
{230, "ET"},
{231, "ET"},
{249, "FX"},
{250, "FR"},
{276, "DE"},
{278, "DD"},
{280, "DE"},
{419, "419"},
{626, "TL"},
{736, "SD"},
{840, "US"},
{854, "BF"},
{891, "CS"},
{899, ""},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{999, "ZZ"},
}
for _, tt := range fromTests {
id, err := getRegionM49(tt.m49)
if want, have := err != nil, tt.id == ""; want != have {
t.Errorf("error(%d): have %v; want %v", tt.m49, have, want)
continue
}
r, _ := getRegionID(b(tt.id))
if r != id {
t.Errorf("region(%d): have %s; want %s", tt.m49, id, r)
}
}
toTests := []struct {
m49 int
id string
}{
{0, "000"},
{0, "IC"}, // Some codes don't have an ID
{001, "001"},
{104, "MM"},
{104, "BU"},
{180, "CD"},
{180, "ZR"},
{231, "ET"},
{250, "FR"},
{249, "FX"},
{276, "DE"},
{278, "DD"},
{419, "419"},
{626, "TL"},
{626, "TP"},
{729, "SD"},
{826, "GB"},
{840, "US"},
{854, "BF"},
{891, "YU"},
{891, "CS"},
{958, "AA"},
{966, "QT"},
{967, "EU"},
{967, "QU"},
{999, "ZZ"},
// For codes that don't have an M49 code use the replacement value,
// if available.
{854, "HV"}, // maps to Burkino Faso
}
for _, tt := range toTests {
r, _ := getRegionID(b(tt.id))
if r.M49() != tt.m49 {
t.Errorf("m49(%q): have %d; want %d", tt.id, r.M49(), tt.m49)
}
}
}
func TestRegionDeprecation(t *testing.T) {
tests := []struct{ in, out string }{
{"BU", "MM"},
{"BUR", "MM"},
{"CT", "KI"},
{"DD", "DE"},
{"DDR", "DE"},
{"DY", "BJ"},
{"FX", "FR"},
{"HV", "BF"},
{"JT", "UM"},
{"MI", "UM"},
{"NH", "VU"},
{"NQ", "AQ"},
{"PU", "UM"},
{"PZ", "PA"},
{"QU", "EU"},
{"RH", "ZW"},
{"TP", "TL"},
{"UK", "GB"},
{"VD", "VN"},
{"WK", "UM"},
{"YD", "YE"},
{"NL", "NL"},
}
for _, tt := range tests {
rIn, _ := getRegionID([]byte(tt.in))
rOut, _ := getRegionISO2([]byte(tt.out))
r := normRegion(rIn)
if rOut == rIn && r != 0 {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.in)
}
if rOut != rIn && r != rOut {
t.Errorf("%s: was %q; want %q", tt.in, r, tt.out)
}
}
}
func TestGetScriptID(t *testing.T) {
idx := tag.Index("0000BbbbDdddEeeeZzzz\xff\xff\xff\xff")
tests := []struct {
in string
out scriptID
}{
{" ", 0},
{" ", 0},
{" ", 0},
{"", 0},
{"Aaaa", 0},
{"Bbbb", 1},
{"Dddd", 2},
{"dddd", 2},
{"dDDD", 2},
{"Eeee", 3},
{"Zzzz", 4},
}
for i, tt := range tests {
if id, err := getScriptID(idx, b(tt.in)); id != tt.out {
t.Errorf("%d:%s: found %d; want %d", i, tt.in, id, tt.out)
} else if id == 0 && err == nil {
t.Errorf("%d:%s: no error; expected one", i, tt.in)
}
}
}
func TestIsPrivateUse(t *testing.T) {
type test struct {
s string
private bool
}
tests := []test{
{"en", false},
{"und", false},
{"pzn", false},
{"qaa", true},
{"qtz", true},
{"qua", false},
}
for i, tt := range tests {
x, _ := getLangID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: langID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"001", false},
{"419", false},
{"899", false},
{"900", false},
{"957", false},
{"958", true},
{"AA", true},
{"AC", false},
{"EU", false}, // CLDR grouping, exceptionally reserved in ISO.
{"QU", true}, // Canonicalizes to EU, User-assigned in ISO.
{"QO", true}, // CLDR grouping, User-assigned in ISO.
{"QA", false},
{"QM", true},
{"QZ", true},
{"XA", true},
{"XK", true}, // Assigned to Kosovo in CLDR, User-assigned in ISO.
{"XZ", true},
{"ZW", false},
{"ZZ", true},
}
for i, tt := range tests {
x, _ := getRegionID([]byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: regionID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
tests = []test{
{"Latn", false},
{"Laaa", false}, // invalid
{"Qaaa", true},
{"Qabx", true},
{"Qaby", false},
{"Zyyy", false},
{"Zzzz", false},
}
for i, tt := range tests {
x, _ := getScriptID(script, []byte(tt.s))
if b := x.IsPrivateUse(); b != tt.private {
t.Errorf("%d: scriptID.IsPrivateUse(%s) was %v; want %v", i, tt.s, b, tt.private)
}
}
}

View file

@ -1,505 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"flag"
"fmt"
"os"
"path"
"path/filepath"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
var verbose = flag.Bool("verbose", false, "set to true to print the internal tables of matchers")
func TestCompliance(t *testing.T) {
filepath.Walk("testdata", func(file string, info os.FileInfo, err error) error {
if info.IsDir() {
return nil
}
r, err := os.Open(file)
if err != nil {
t.Fatal(err)
}
ucd.Parse(r, func(p *ucd.Parser) {
name := strings.Replace(path.Join(p.String(0), p.String(1)), " ", "", -1)
if skip[name] {
return
}
t.Run(info.Name()+"/"+name, func(t *testing.T) {
supported := makeTagList(p.String(0))
desired := makeTagList(p.String(1))
gotCombined, index, conf := NewMatcher(supported).Match(desired...)
gotMatch := supported[index]
wantMatch := mk(p.String(2))
if gotMatch != wantMatch {
t.Fatalf("match: got %q; want %q (%v)", gotMatch, wantMatch, conf)
}
wantCombined, err := Raw.Parse(p.String(3))
if err == nil && gotCombined != wantCombined {
t.Errorf("combined: got %q; want %q (%v)", gotCombined, wantCombined, conf)
}
})
})
return nil
})
}
var skip = map[string]bool{
// TODO: bugs
// Honor the wildcard match. This may only be useful to select non-exact
// stuff.
"mul,af/nl": true, // match: got "af"; want "mul"
// TODO: include other extensions.
// combined: got "en-GB-u-ca-buddhist-nu-arab"; want "en-GB-fonipa-t-m0-iso-i0-pinyin-u-ca-buddhist-nu-arab"
"und,en-GB-u-sd-gbsct/en-fonipa-u-nu-Arab-ca-buddhist-t-m0-iso-i0-pinyin": true,
// Inconsistencies with Mark Davis' implementation where it is not clear
// which is better.
// Inconsistencies in combined. I think the Go approach is more appropriate.
// We could use -u-rg- and -u-va- as alternative.
"und,fr/fr-BE-fonipa": true, // combined: got "fr"; want "fr-BE-fonipa"
"und,fr-CA/fr-BE-fonipa": true, // combined: got "fr-CA"; want "fr-BE-fonipa"
"und,fr-fonupa/fr-BE-fonipa": true, // combined: got "fr-fonupa"; want "fr-BE-fonipa"
"und,no/nn-BE-fonipa": true, // combined: got "no"; want "no-BE-fonipa"
"50,und,fr-CA-fonupa/fr-BE-fonipa": true, // combined: got "fr-CA-fonupa"; want "fr-BE-fonipa"
// The initial number is a threshold. As we don't use scoring, we will not
// implement this.
"50,und,fr-Cyrl-CA-fonupa/fr-BE-fonipa": true,
// match: got "und"; want "fr-Cyrl-CA-fonupa"
// combined: got "und"; want "fr-Cyrl-BE-fonipa"
// Other interesting cases to test:
// - Should same language or same script have the preference if there is
// usually no understanding of the other script?
// - More specific region in desired may replace enclosing supported.
}
func makeTagList(s string) (tags []Tag) {
for _, s := range strings.Split(s, ",") {
tags = append(tags, mk(strings.TrimSpace(s)))
}
return tags
}
func TestMatchStrings(t *testing.T) {
testCases := []struct {
supported string
desired string // strings separted by |
tag string
index int
}{{
supported: "en",
desired: "",
tag: "en",
index: 0,
}, {
supported: "en",
desired: "nl",
tag: "en",
index: 0,
}, {
supported: "en,nl",
desired: "nl",
tag: "nl",
index: 1,
}, {
supported: "en,nl",
desired: "nl|en",
tag: "nl",
index: 1,
}, {
supported: "en-GB,nl",
desired: "en ; q=0.1,nl",
tag: "nl",
index: 1,
}, {
supported: "en-GB,nl",
desired: "en;q=0.005 | dk; q=0.1,nl ",
tag: "en-GB",
index: 0,
}, {
// do not match faulty tags with und
supported: "en,und",
desired: "|en",
tag: "en",
index: 0,
}}
for _, tc := range testCases {
t.Run(path.Join(tc.supported, tc.desired), func(t *testing.T) {
m := NewMatcher(makeTagList(tc.supported))
tag, index := MatchStrings(m, strings.Split(tc.desired, "|")...)
if tag.String() != tc.tag || index != tc.index {
t.Errorf("got %v, %d; want %v, %d", tag, index, tc.tag, tc.index)
}
})
}
}
func TestAddLikelySubtags(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa-Latn-ET"},
{"aa-Latn", "aa-Latn-ET"},
{"aa-Arab", "aa-Arab-ET"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"kk", "kk-Cyrl-KZ"},
{"kk-CN", "kk-Arab-CN"},
{"cmn", "cmn"},
{"zh-AU", "zh-Hant-AU"},
{"zh-VN", "zh-Hant-VN"},
{"zh-SG", "zh-Hans-SG"},
{"zh-Hant", "zh-Hant-TW"},
{"zh-Hani", "zh-Hani-CN"},
{"und-Hani", "zh-Hani-CN"},
{"und", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"und-CW", "pap-Latn-CW"},
{"und-YT", "fr-Latn-YT"},
{"und-Arab", "ar-Arab-EG"},
{"und-AM", "hy-Armn-AM"},
{"und-TW", "zh-Hant-TW"},
{"und-002", "en-Latn-NG"},
{"und-Latn-002", "en-Latn-NG"},
{"en-Latn-002", "en-Latn-NG"},
{"en-002", "en-Latn-NG"},
{"en-001", "en-Latn-US"},
{"und-003", "en-Latn-US"},
{"und-GB", "en-Latn-GB"},
{"Latn-001", "en-Latn-US"},
{"en-001", "en-Latn-US"},
{"es-419", "es-Latn-419"},
{"he-145", "he-Hebr-IL"},
{"ky-145", "ky-Latn-TR"},
{"kk", "kk-Cyrl-KZ"},
// Don't specialize duplicate and ambiguous matches.
{"kk-034", "kk-Arab-034"}, // Matches IR and AF. Both are Arab.
{"ku-145", "ku-Latn-TR"}, // Matches IQ, TR, and LB, but kk -> TR.
{"und-Arab-CC", "ms-Arab-CC"},
{"und-Arab-GB", "ks-Arab-GB"},
{"und-Hans-CC", "zh-Hans-CC"},
{"und-CC", "en-Latn-CC"},
{"sr", "sr-Cyrl-RS"},
{"sr-151", "sr-Latn-151"}, // Matches RO and RU.
// We would like addLikelySubtags to generate the same results if the input
// only changes by adding tags that would otherwise have been added
// by the expansion.
// In other words:
// und-AA -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-AA -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies und-Scrp-AA -> xx-Scrp-AA
// und-Scrp -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-Scrp -> xx-Scrp-AA
// xx -> xx-Scrp-AA implies xx-AA -> xx-Scrp-AA
//
// The algorithm specified in
// http://unicode.org/reports/tr35/tr35-9.html#Supplemental_Data,
// Section C.10, does not handle the first case. For example,
// the CLDR data contains an entry und-BJ -> fr-Latn-BJ, but not
// there is no rule for und-Latn-BJ. According to spec, und-Latn-BJ
// would expand to en-Latn-BJ, violating the aforementioned principle.
// We deviate from the spec by letting und-Scrp-AA expand to xx-Scrp-AA
// if a rule of the form und-AA -> xx-Scrp-AA is defined.
// Note that as of version 23, CLDR has some explicitly specified
// entries that do not conform to these rules. The implementation
// will not correct these explicit inconsistencies. A later versions of CLDR
// is supposed to fix this.
{"und-Latn-BJ", "fr-Latn-BJ"},
{"und-Bugi-ID", "bug-Bugi-ID"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "ar-Arab-AA"},
{"und-Afak-RE", "fr-Afak-RE"},
{"und-Arab-GB", "ks-Arab-GB"},
{"abp-Arab-GB", "abp-Arab-GB"},
// script has preference over region
{"und-Arab-NL", "ar-Arab-NL"},
{"zza", "zza-Latn-TR"},
// preserve variants and extensions
{"de-1901", "de-Latn-DE-1901"},
{"de-x-abc", "de-Latn-DE-x-abc"},
{"de-1901-x-abc", "de-Latn-DE-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
in, _ = in.addLikelySubtags()
if in.String() != out.String() {
t.Errorf("%d: add(%s) was %s; want %s", i, tt.in, in, tt.out)
}
}
}
func TestMinimize(t *testing.T) {
tests := []struct{ in, out string }{
{"aa", "aa"},
{"aa-Latn", "aa"},
{"aa-Latn-ET", "aa"},
{"aa-ET", "aa"},
{"aa-Arab", "aa-Arab"},
{"aa-Arab-ER", "aa-Arab-ER"},
{"aa-Arab-ET", "aa-Arab"},
{"und", "und"},
{"und-Latn", "und"},
{"und-Latn-US", "und"},
{"en-Latn-US", "en"},
{"cmn", "cmn"},
{"cmn-Hans", "cmn-Hans"},
{"cmn-Hant", "cmn-Hant"},
{"zh-AU", "zh-AU"},
{"zh-VN", "zh-VN"},
{"zh-SG", "zh-SG"},
{"zh-Hant", "zh-Hant"},
{"zh-Hant-TW", "zh-TW"},
{"zh-Hans", "zh"},
{"zh-Hani", "zh-Hani"},
{"und-Hans", "und-Hans"},
{"und-Hani", "und-Hani"},
{"und-CW", "und-CW"},
{"und-YT", "und-YT"},
{"und-Arab", "und-Arab"},
{"und-AM", "und-AM"},
{"und-Arab-CC", "und-Arab-CC"},
{"und-CC", "und-CC"},
{"und-Latn-BJ", "und-BJ"},
{"und-Bugi-ID", "und-Bugi"},
{"bug-Bugi-ID", "bug-Bugi"},
// regions, scripts and languages without definitions
{"und-Arab-AA", "und-Arab-AA"},
// preserve variants and extensions
{"de-Latn-1901", "de-1901"},
{"de-Latn-x-abc", "de-x-abc"},
{"de-DE-1901-x-abc", "de-1901-x-abc"},
{"x-abc", "x-abc"}, // TODO: is this the desired behavior?
}
for i, tt := range tests {
in, _ := Parse(tt.in)
out, _ := Parse(tt.out)
min, _ := in.minimize()
if min.String() != out.String() {
t.Errorf("%d: min(%s) was %s; want %s", i, tt.in, min, tt.out)
}
max, _ := min.addLikelySubtags()
if x, _ := in.addLikelySubtags(); x.String() != max.String() {
t.Errorf("%d: max(min(%s)) = %s; want %s", i, tt.in, max, x)
}
}
}
func TestRegionGroups(t *testing.T) {
testCases := []struct {
a, b string
distance uint8
}{
{"zh-TW", "zh-HK", 5},
{"zh-MO", "zh-HK", 4},
{"es-ES", "es-AR", 5},
{"es-ES", "es", 4},
{"es-419", "es-MX", 4},
{"es-AR", "es-MX", 4},
{"es-ES", "es-MX", 5},
{"es-PT", "es-MX", 5},
}
for _, tc := range testCases {
a := MustParse(tc.a)
aScript, _ := a.Script()
b := MustParse(tc.b)
bScript, _ := b.Script()
if aScript != bScript {
t.Errorf("scripts differ: %q vs %q", aScript, bScript)
continue
}
d, _ := regionGroupDist(a.region, b.region, aScript.scriptID, a.lang)
if d != tc.distance {
t.Errorf("got %q; want %q", d, tc.distance)
}
}
}
func TestIsParadigmLocale(t *testing.T) {
testCases := map[string]bool{
"en-US": true,
"en-GB": true,
"en-VI": false,
"es-GB": false,
"es-ES": true,
"es-419": true,
}
for str, want := range testCases {
tag := Make(str)
got := isParadigmLocale(tag.lang, tag.region)
if got != want {
t.Errorf("isPL(%q) = %v; want %v", str, got, want)
}
}
}
// Implementation of String methods for various types for debugging purposes.
func (m *matcher) String() string {
w := &bytes.Buffer{}
fmt.Fprintln(w, "Default:", m.default_)
for tag, h := range m.index {
fmt.Fprintf(w, " %s: %v\n", tag, h)
}
return w.String()
}
func (h *matchHeader) String() string {
w := &bytes.Buffer{}
fmt.Fprint(w, "haveTag: ")
for _, h := range h.haveTags {
fmt.Fprintf(w, "%v, ", h)
}
return w.String()
}
func (t haveTag) String() string {
return fmt.Sprintf("%v:%d:%v:%v-%v|%v", t.tag, t.index, t.conf, t.maxRegion, t.maxScript, t.altScript)
}
func TestBestMatchAlloc(t *testing.T) {
m := NewMatcher(makeTagList("en sr nl"))
// Go allocates when creating a list of tags from a single tag!
list := []Tag{English}
avg := testtext.AllocsPerRun(1, func() {
m.Match(list...)
})
if avg > 0 {
t.Errorf("got %f; want 0", avg)
}
}
var benchHave = []Tag{
mk("en"),
mk("en-GB"),
mk("za"),
mk("zh-Hant"),
mk("zh-Hans-CN"),
mk("zh"),
mk("zh-HK"),
mk("ar-MK"),
mk("en-CA"),
mk("fr-CA"),
mk("fr-US"),
mk("fr-CH"),
mk("fr"),
mk("lt"),
mk("lv"),
mk("iw"),
mk("iw-NL"),
mk("he"),
mk("he-IT"),
mk("tlh"),
mk("ja"),
mk("ja-Jpan"),
mk("ja-Jpan-JP"),
mk("de"),
mk("de-CH"),
mk("de-AT"),
mk("de-DE"),
mk("sr"),
mk("sr-Latn"),
mk("sr-Cyrl"),
mk("sr-ME"),
}
var benchWant = [][]Tag{
[]Tag{
mk("en"),
},
[]Tag{
mk("en-AU"),
mk("de-HK"),
mk("nl"),
mk("fy"),
mk("lv"),
},
[]Tag{
mk("en-AU"),
mk("de-HK"),
mk("nl"),
mk("fy"),
},
[]Tag{
mk("ja-Hant"),
mk("da-HK"),
mk("nl"),
mk("zh-TW"),
},
[]Tag{
mk("ja-Hant"),
mk("da-HK"),
mk("nl"),
mk("hr"),
},
}
func BenchmarkMatch(b *testing.B) {
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
for _, want := range benchWant {
m.getBest(want...)
}
}
}
func BenchmarkMatchExact(b *testing.B) {
want := mk("en")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltLanguagePresent(b *testing.B) {
want := mk("hr")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltLanguageNotPresent(b *testing.B) {
want := mk("nn")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltScriptPresent(b *testing.B) {
want := mk("zh-Hant-CN")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchAltScriptNotPresent(b *testing.B) {
want := mk("fr-Cyrl")
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want)
}
}
func BenchmarkMatchLimitedExact(b *testing.B) {
want := []Tag{mk("he-NL"), mk("iw-NL")}
m := newMatcher(benchHave, nil)
for i := 0; i < b.N; i++ {
m.getBest(want...)
}
}

View file

@ -1,517 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package language
import (
"bytes"
"strings"
"testing"
"golang.org/x/text/internal/tag"
)
type scanTest struct {
ok bool // true if scanning does not result in an error
in string
tok []string // the expected tokens
}
var tests = []scanTest{
{true, "", []string{}},
{true, "1", []string{"1"}},
{true, "en", []string{"en"}},
{true, "root", []string{"root"}},
{true, "maxchars", []string{"maxchars"}},
{false, "bad/", []string{}},
{false, "morethan8", []string{}},
{false, "-", []string{}},
{false, "----", []string{}},
{false, "_", []string{}},
{true, "en-US", []string{"en", "US"}},
{true, "en_US", []string{"en", "US"}},
{false, "en-US-", []string{"en", "US"}},
{false, "en-US--", []string{"en", "US"}},
{false, "en-US---", []string{"en", "US"}},
{false, "en--US", []string{"en", "US"}},
{false, "-en-US", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "-en--US-", []string{"en", "US"}},
{false, "en-.-US", []string{"en", "US"}},
{false, ".-en--US-.", []string{"en", "US"}},
{false, "en-u.-US", []string{"en", "US"}},
{true, "en-u1-US", []string{"en", "u1", "US"}},
{true, "maxchar1_maxchar2-maxchar3", []string{"maxchar1", "maxchar2", "maxchar3"}},
{false, "moreThan8-moreThan8-e", []string{"e"}},
}
func TestScan(t *testing.T) {
for i, tt := range tests {
scan := makeScannerString(tt.in)
for j := 0; !scan.done; j++ {
if j >= len(tt.tok) {
t.Errorf("%d: extra token %q", i, scan.token)
} else if tag.Compare(tt.tok[j], scan.token) != 0 {
t.Errorf("%d: token %d: found %q; want %q", i, j, scan.token, tt.tok[j])
break
}
scan.scan()
}
if s := strings.Join(tt.tok, "-"); tag.Compare(s, bytes.Replace(scan.b, b("_"), b("-"), -1)) != 0 {
t.Errorf("%d: input: found %q; want %q", i, scan.b, s)
}
if (scan.err == nil) != tt.ok {
t.Errorf("%d: ok: found %v; want %v", i, scan.err == nil, tt.ok)
}
}
}
func TestAcceptMinSize(t *testing.T) {
for i, tt := range tests {
// count number of successive tokens with a minimum size.
for sz := 1; sz <= 8; sz++ {
scan := makeScannerString(tt.in)
scan.end, scan.next = 0, 0
end := scan.acceptMinSize(sz)
n := 0
for i := 0; i < len(tt.tok) && len(tt.tok[i]) >= sz; i++ {
n += len(tt.tok[i])
if i > 0 {
n++
}
}
if end != n {
t.Errorf("%d:%d: found len %d; want %d", i, sz, end, n)
}
}
}
}
type parseTest struct {
i int // the index of this test
in string
lang, script, region string
variants, ext string
extList []string // only used when more than one extension is present
invalid bool
rewrite bool // special rewrite not handled by parseTag
changed bool // string needed to be reformatted
}
func parseTests() []parseTest {
tests := []parseTest{
{in: "root", lang: "und"},
{in: "und", lang: "und"},
{in: "en", lang: "en"},
{in: "xy", lang: "und", invalid: true},
{in: "en-ZY", lang: "en", invalid: true},
{in: "gsw", lang: "gsw"},
{in: "sr_Latn", lang: "sr", script: "Latn"},
{in: "af-Arab", lang: "af", script: "Arab"},
{in: "nl-BE", lang: "nl", region: "BE"},
{in: "es-419", lang: "es", region: "419"},
{in: "und-001", lang: "und", region: "001"},
{in: "de-latn-be", lang: "de", script: "Latn", region: "BE"},
// Variants
{in: "de-1901", lang: "de", variants: "1901"},
// Accept with unsuppressed script.
{in: "de-Latn-1901", lang: "de", script: "Latn", variants: "1901"},
// Specialized.
{in: "sl-rozaj", lang: "sl", variants: "rozaj"},
{in: "sl-rozaj-lipaw", lang: "sl", variants: "rozaj-lipaw"},
{in: "sl-rozaj-biske", lang: "sl", variants: "rozaj-biske"},
{in: "sl-rozaj-biske-1994", lang: "sl", variants: "rozaj-biske-1994"},
{in: "sl-rozaj-1994", lang: "sl", variants: "rozaj-1994"},
// Maximum number of variants while adhering to prefix rules.
{in: "sl-rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp"},
// Sorting.
{in: "sl-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "sl-rozaj-biske-1994-alalc97-fonupa-fonipa-fonxsamp", lang: "sl", variants: "rozaj-biske-1994-alalc97-fonipa-fonupa-fonxsamp", changed: true},
{in: "nl-fonxsamp-alalc97-fonipa-fonupa", lang: "nl", variants: "alalc97-fonipa-fonupa-fonxsamp", changed: true},
// Duplicates variants are removed, but not an error.
{in: "nl-fonupa-fonupa", lang: "nl", variants: "fonupa"},
// Variants that do not have correct prefixes. We still accept these.
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
{in: "sl-rozaj-lipaw-1994", lang: "sl", variants: "rozaj-lipaw-1994"},
{in: "sl-1994-biske-rozaj-1994-biske-rozaj", lang: "sl", variants: "rozaj-biske-1994", changed: true},
{in: "de-Cyrl-1901", lang: "de", script: "Cyrl", variants: "1901"},
// Invalid variant.
{in: "de-1902", lang: "de", variants: "", invalid: true},
{in: "EN_CYRL", lang: "en", script: "Cyrl"},
// private use and extensions
{in: "x-a-b-c-d", ext: "x-a-b-c-d"},
{in: "x_A.-B-C_D", ext: "x-b-c-d", invalid: true, changed: true},
{in: "x-aa-bbbb-cccccccc-d", ext: "x-aa-bbbb-cccccccc-d"},
{in: "en-c_cc-b-bbb-a-aaa", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc"}},
{in: "en-x_cc-b-bbb-a-aaa", lang: "en", ext: "x-cc-b-bbb-a-aaa", changed: true},
{in: "en-c_cc-b-bbb-a-aaa-x-x", lang: "en", changed: true, extList: []string{"a-aaa", "b-bbb", "c-cc", "x-x"}},
{in: "en-v-c", lang: "en", ext: "", invalid: true},
{in: "en-v-abcdefghi", lang: "en", ext: "", invalid: true},
{in: "en-v-abc-x", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-x-", lang: "en", ext: "v-abc", invalid: true},
{in: "en-v-abc-w-x-xx", lang: "en", extList: []string{"v-abc", "x-xx"}, invalid: true, changed: true},
{in: "en-v-abc-w-y-yx", lang: "en", extList: []string{"v-abc", "y-yx"}, invalid: true, changed: true},
{in: "en-v-c-abc", lang: "en", ext: "c-abc", invalid: true, changed: true},
{in: "en-v-w-abc", lang: "en", ext: "w-abc", invalid: true, changed: true},
{in: "en-v-x-abc", lang: "en", ext: "x-abc", invalid: true, changed: true},
{in: "en-v-x-a", lang: "en", ext: "x-a", invalid: true, changed: true},
{in: "en-9-aa-0-aa-z-bb-x-a", lang: "en", extList: []string{"0-aa", "9-aa", "z-bb", "x-a"}, changed: true},
{in: "en-u-c", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebk", lang: "en", ext: "u-co-phonebk"},
{in: "en-u-co-phonebk-ca", lang: "en", ext: "u-co-phonebk", invalid: true},
{in: "en-u-nu-arabic-co-phonebk-ca", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-x", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-s", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-nu-arabic-co-phonebk-ca-a12345678", lang: "en", ext: "u-co-phonebk-nu-arabic", invalid: true, changed: true},
{in: "en-u-co-phonebook", lang: "en", ext: "", invalid: true},
{in: "en-u-co-phonebook-cu-xau", lang: "en", ext: "u-cu-xau", invalid: true, changed: true},
{in: "en-Cyrl-u-co-phonebk", lang: "en", script: "Cyrl", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk", lang: "en", region: "US", ext: "u-co-phonebk"},
{in: "en-US-u-co-phonebk-cu-xau", lang: "en", region: "US", ext: "u-co-phonebk-cu-xau"},
{in: "en-scotland-u-co-phonebk", lang: "en", variants: "scotland", ext: "u-co-phonebk"},
{in: "en-u-cu-xua-co-phonebk", lang: "en", ext: "u-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc-cu-xua-co-phonebk", lang: "en", ext: "u-abc-def-co-phonebk-cu-xua", changed: true},
{in: "en-u-def-abc", lang: "en", ext: "u-abc-def", changed: true},
{in: "en-u-cu-xua-co-phonebk-a-cd", lang: "en", extList: []string{"a-cd", "u-co-phonebk-cu-xua"}, changed: true},
// Invalid "u" extension. Drop invalid parts.
{in: "en-u-cu-co-phonebk", lang: "en", extList: []string{"u-co-phonebk"}, invalid: true, changed: true},
{in: "en-u-cu-xau-co", lang: "en", extList: []string{"u-cu-xau"}, invalid: true},
// We allow duplicate keys as the LDML spec does not explicitly prohibit it.
// TODO: Consider eliminating duplicates and returning an error.
{in: "en-u-cu-xau-co-phonebk-cu-xau", lang: "en", ext: "u-co-phonebk-cu-xau-cu-xau", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa", lang: "en", ext: "t-en-cyrl-nl-fonipa", changed: true},
{in: "en-t-en-Cyrl-NL-fonipa-t0-abc-def", lang: "en", ext: "t-en-cyrl-nl-fonipa-t0-abc-def", changed: true},
{in: "en-t-t0-abcd", lang: "en", ext: "t-t0-abcd"},
// Not necessary to have changed here.
{in: "en-t-nl-abcd", lang: "en", ext: "t-nl", invalid: true},
{in: "en-t-nl-latn", lang: "en", ext: "t-nl-latn"},
{in: "en-t-t0-abcd-x-a", lang: "en", extList: []string{"t-t0-abcd", "x-a"}},
// invalid
{in: "", lang: "und", invalid: true},
{in: "-", lang: "und", invalid: true},
{in: "x", lang: "und", invalid: true},
{in: "x-", lang: "und", invalid: true},
{in: "x--", lang: "und", invalid: true},
{in: "a-a-b-c-d", lang: "und", invalid: true},
{in: "en-", lang: "en", invalid: true},
{in: "enne-", lang: "und", invalid: true},
{in: "en.", lang: "und", invalid: true},
{in: "en.-latn", lang: "und", invalid: true},
{in: "en.-en", lang: "en", invalid: true},
{in: "x-a-tooManyChars-c-d", ext: "x-a-c-d", invalid: true, changed: true},
{in: "a-tooManyChars-c-d", lang: "und", invalid: true},
// TODO: check key-value validity
// { in: "en-u-cu-xd", lang: "en", ext: "u-cu-xd", invalid: true },
{in: "en-t-abcd", lang: "en", invalid: true},
{in: "en-Latn-US-en", lang: "en", script: "Latn", region: "US", invalid: true},
// rewrites (more tests in TestGrandfathered)
{in: "zh-min-nan", lang: "nan"},
{in: "zh-yue", lang: "yue"},
{in: "zh-xiang", lang: "hsn", rewrite: true},
{in: "zh-guoyu", lang: "cmn", rewrite: true},
{in: "iw", lang: "iw"},
{in: "sgn-BE-FR", lang: "sfb", rewrite: true},
{in: "i-klingon", lang: "tlh", rewrite: true},
}
for i, tt := range tests {
tests[i].i = i
if tt.extList != nil {
tests[i].ext = strings.Join(tt.extList, "-")
}
if tt.ext != "" && tt.extList == nil {
tests[i].extList = []string{tt.ext}
}
}
return tests
}
func TestParseExtensions(t *testing.T) {
for i, tt := range parseTests() {
if tt.ext == "" || tt.rewrite {
continue
}
scan := makeScannerString(tt.in)
if len(scan.b) > 1 && scan.b[1] != '-' {
scan.end = nextExtension(string(scan.b), 0)
scan.next = scan.end + 1
scan.scan()
}
start := scan.start
scan.toLower(start, len(scan.b))
parseExtensions(&scan)
ext := string(scan.b[start:])
if ext != tt.ext {
t.Errorf("%d(%s): ext was %v; want %v", i, tt.in, ext, tt.ext)
}
if changed := !strings.HasPrefix(tt.in[start:], ext); changed != tt.changed {
t.Errorf("%d(%s): changed was %v; want %v", i, tt.in, changed, tt.changed)
}
}
}
// partChecks runs checks for each part by calling the function returned by f.
func partChecks(t *testing.T, f func(*parseTest) (Tag, bool)) {
for i, tt := range parseTests() {
tag, skip := f(&tt)
if skip {
continue
}
if l, _ := getLangID(b(tt.lang)); l != tag.lang {
t.Errorf("%d: lang was %q; want %q", i, tag.lang, l)
}
if sc, _ := getScriptID(script, b(tt.script)); sc != tag.script {
t.Errorf("%d: script was %q; want %q", i, tag.script, sc)
}
if r, _ := getRegionID(b(tt.region)); r != tag.region {
t.Errorf("%d: region was %q; want %q", i, tag.region, r)
}
if tag.str == "" {
continue
}
p := int(tag.pVariant)
if p < int(tag.pExt) {
p++
}
if s, g := tag.str[p:tag.pExt], tt.variants; s != g {
t.Errorf("%d: variants was %q; want %q", i, s, g)
}
p = int(tag.pExt)
if p > 0 && p < len(tag.str) {
p++
}
if s, g := (tag.str)[p:], tt.ext; s != g {
t.Errorf("%d: extensions were %q; want %q", i, s, g)
}
}
}
func TestParseTag(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
if strings.HasPrefix(tt.in, "x-") || tt.rewrite {
return Tag{}, true
}
scan := makeScannerString(tt.in)
id, end := parseTag(&scan)
id.str = string(scan.b[:end])
tt.ext = ""
tt.extList = []string{}
return id, false
})
}
func TestParse(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
id, err := Raw.Parse(tt.in)
ext := ""
if id.str != "" {
if strings.HasPrefix(id.str, "x-") {
ext = id.str
} else if int(id.pExt) < len(id.str) && id.pExt > 0 {
ext = id.str[id.pExt+1:]
}
}
if tag, _ := Raw.Parse(id.String()); tag.String() != id.String() {
t.Errorf("%d:%s: reparse was %q; want %q", tt.i, tt.in, id.String(), tag.String())
}
if ext != tt.ext {
t.Errorf("%d:%s: ext was %q; want %q", tt.i, tt.in, ext, tt.ext)
}
changed := id.str != "" && !strings.HasPrefix(tt.in, id.str)
if changed != tt.changed {
t.Errorf("%d:%s: changed was %v; want %v", tt.i, tt.in, changed, tt.changed)
}
if (err != nil) != tt.invalid {
t.Errorf("%d:%s: invalid was %v; want %v. Error: %v", tt.i, tt.in, err != nil, tt.invalid, err)
}
return id, false
})
}
func TestErrors(t *testing.T) {
mkInvalid := func(s string) error {
return mkErrInvalid([]byte(s))
}
tests := []struct {
in string
out error
}{
// invalid subtags.
{"ac", mkInvalid("ac")},
{"AC", mkInvalid("ac")},
{"aa-Uuuu", mkInvalid("Uuuu")},
{"aa-AB", mkInvalid("AB")},
// ill-formed wins over invalid.
{"ac-u", errSyntax},
{"ac-u-ca", errSyntax},
{"ac-u-ca-co-pinyin", errSyntax},
{"noob", errSyntax},
}
for _, tt := range tests {
_, err := Parse(tt.in)
if err != tt.out {
t.Errorf("%s: was %q; want %q", tt.in, err, tt.out)
}
}
}
func TestCompose1(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
l, _ := ParseBase(tt.lang)
s, _ := ParseScript(tt.script)
r, _ := ParseRegion(tt.region)
v := []Variant{}
for _, x := range strings.Split(tt.variants, "-") {
p, _ := ParseVariant(x)
v = append(v, p)
}
e := []Extension{}
for _, x := range tt.extList {
p, _ := ParseExtension(x)
e = append(e, p)
}
id, _ = Raw.Compose(l, s, r, v, e)
return id, false
})
}
func TestCompose2(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
l, _ := ParseBase(tt.lang)
s, _ := ParseScript(tt.script)
r, _ := ParseRegion(tt.region)
p := []interface{}{l, s, r, s, r, l}
for _, x := range strings.Split(tt.variants, "-") {
v, _ := ParseVariant(x)
p = append(p, v)
}
for _, x := range tt.extList {
e, _ := ParseExtension(x)
p = append(p, e)
}
id, _ = Raw.Compose(p...)
return id, false
})
}
func TestCompose3(t *testing.T) {
partChecks(t, func(tt *parseTest) (id Tag, skip bool) {
id, _ = Raw.Parse(tt.in)
id, _ = Raw.Compose(id)
return id, false
})
}
func mk(s string) Tag {
return Raw.Make(s)
}
func TestParseAcceptLanguage(t *testing.T) {
type res struct {
t Tag
q float32
}
en := []res{{mk("en"), 1.0}}
tests := []struct {
out []res
in string
ok bool
}{
{en, "en", true},
{en, " en", true},
{en, "en ", true},
{en, " en ", true},
{en, "en,", true},
{en, ",en", true},
{en, ",,,en,,,", true},
{en, ",en;q=1", true},
// We allow an empty input, contrary to spec.
{nil, "", true},
{[]res{{mk("aa"), 1}}, "aa;", true}, // allow unspecified weight
// errors
{nil, ";", false},
{nil, "$", false},
{nil, "e;", false},
{nil, "x;", false},
{nil, "x", false},
{nil, "ac", false}, // non-existing language
{nil, "aa;q", false},
{nil, "aa;q=", false},
{nil, "aa;q=.", false},
// odd fallbacks
{
[]res{{mk("en"), 0.1}},
" english ;q=.1",
true,
},
{
[]res{{mk("it"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}},
" italian, deutsch, french",
true,
},
// lists
{
[]res{{mk("en"), 0.1}},
"en;q=.1",
true,
},
{
[]res{{mk("mul"), 1.0}},
"*",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}},
"en,de",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), .5}},
"en,de;q=0.5",
true,
},
{
[]res{{mk("de"), 0.8}, {mk("en"), 0.5}},
" en ; q = 0.5 , , de;q=0.8",
true,
},
{
[]res{{mk("en"), 1.0}, {mk("de"), 1.0}, {mk("fr"), 1.0}, {mk("tlh"), 1.0}},
"en,de,fr,i-klingon",
true,
},
// sorting
{
[]res{{mk("tlh"), 0.4}, {mk("de"), 0.2}, {mk("fr"), 0.2}, {mk("en"), 0.1}},
"en;q=0.1,de;q=0.2,fr;q=0.2,i-klingon;q=0.4",
true,
},
// dropping
{
[]res{{mk("fr"), 0.2}, {mk("en"), 0.1}},
"en;q=0.1,de;q=0,fr;q=0.2,i-klingon;q=0.0",
true,
},
}
for i, tt := range tests {
tags, qs, e := ParseAcceptLanguage(tt.in)
if e == nil != tt.ok {
t.Errorf("%d:%s:err: was %v; want %v", i, tt.in, e == nil, tt.ok)
}
for j, tag := range tags {
if out := tt.out[j]; !tag.equalTags(out.t) || qs[j] != out.q {
t.Errorf("%d:%s: was %s, %1f; want %s, %1f", i, tt.in, tag, qs[j], out.t, out.q)
break
}
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,282 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runes
import (
"strings"
"testing"
"unicode"
"golang.org/x/text/cases"
"golang.org/x/text/language"
"golang.org/x/text/transform"
)
var (
toUpper = cases.Upper(language.Und)
toLower = cases.Lower(language.Und)
)
type spanformer interface {
transform.SpanningTransformer
}
func TestPredicate(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(Predicate(func(r rune) bool {
return unicode.Is(rt, r)
}), t, f)
})
}
func TestIn(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(In(rt), t, f)
})
}
func TestNotIn(t *testing.T) {
testConditional(t, func(rt *unicode.RangeTable, t, f spanformer) spanformer {
return If(NotIn(rt), f, t)
})
}
func testConditional(t *testing.T, f func(rt *unicode.RangeTable, t, f spanformer) spanformer) {
lower := f(unicode.Latin, toLower, toLower)
for i, tt := range []transformTest{{
desc: "empty",
szDst: large,
atEOF: true,
in: "",
out: "",
outFull: "",
t: lower,
}, {
desc: "small",
szDst: 1,
atEOF: true,
in: "B",
out: "b",
outFull: "b",
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst",
szDst: 2,
atEOF: true,
in: "AAA",
out: "aa",
outFull: "aaa",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst writing error",
szDst: 1,
atEOF: false,
in: "A\x80",
out: "a",
outFull: "a\x80",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short dst writing incomplete rune",
szDst: 2,
atEOF: true,
in: "Σ\xc2",
out: "Σ",
outFull: "Σ\xc2",
err: transform.ErrShortDst,
t: f(unicode.Latin, toLower, nil),
}, {
desc: "short dst, longer",
szDst: 5,
atEOF: true,
in: "Hellø",
out: "Hell",
outFull: "Hellø",
err: transform.ErrShortDst,
// idem is used to test short buffers by forcing processing of full-rune increments.
t: f(unicode.Latin, Map(idem), nil),
}, {
desc: "short dst, longer, writing error",
szDst: 6,
atEOF: false,
in: "\x80Hello\x80",
out: "\x80Hello",
outFull: "\x80Hello\x80",
err: transform.ErrShortDst,
t: f(unicode.Latin, Map(idem), nil),
}, {
desc: "short src",
szDst: 2,
atEOF: false,
in: "A\xc2",
out: "a",
outFull: "a\xc2",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: lower,
}, {
desc: "short src no change",
szDst: 2,
atEOF: false,
in: "a\xc2",
out: "a",
outFull: "a\xc2",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
nSpan: 1,
t: lower,
}, {
desc: "invalid input, atEOF",
szDst: large,
atEOF: true,
in: "\x80",
out: "\x80",
outFull: "\x80",
t: lower,
}, {
desc: "invalid input, !atEOF",
szDst: large,
atEOF: false,
in: "\x80",
out: "\x80",
outFull: "\x80",
t: lower,
}, {
desc: "invalid input, incomplete rune atEOF",
szDst: large,
atEOF: true,
in: "\xc2",
out: "\xc2",
outFull: "\xc2",
t: lower,
}, {
desc: "nop",
szDst: large,
atEOF: true,
in: "Hello World!",
out: "Hello World!",
outFull: "Hello World!",
t: f(unicode.Latin, nil, nil),
}, {
desc: "nop in",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "this is α ΤΕΣΤ",
outFull: "this is α ΤΕΣΤ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Greek, nil, toLower),
}, {
desc: "nop in latin",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "THIS IS α τεστ",
outFull: "THIS IS α τεστ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, nil, toLower),
}, {
desc: "nop not in",
szDst: large,
atEOF: true,
in: "THIS IS α ΤΕΣΤ",
out: "this is α ΤΕΣΤ",
outFull: "this is α ΤΕΣΤ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, toLower, nil),
}, {
desc: "pass atEOF is true when at end",
szDst: large,
atEOF: true,
in: "hello",
out: "HELLO",
outFull: "HELLO",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end of segment",
szDst: large,
atEOF: true,
in: "hello ",
out: "HELLO ",
outFull: "HELLO ",
errSpan: transform.ErrEndOfSpan,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "don't pass atEOF is true when atEOF is false",
szDst: large,
atEOF: false,
in: "hello",
out: "",
outFull: "HELLO",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end, no change",
szDst: large,
atEOF: true,
in: "HELLO",
out: "HELLO",
outFull: "HELLO",
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "pass atEOF is true when at end of segment, no change",
szDst: large,
atEOF: true,
in: "HELLO ",
out: "HELLO ",
outFull: "HELLO ",
t: f(unicode.Latin, upperAtEOF{}, nil),
}, {
desc: "large input ASCII",
szDst: 12000,
atEOF: false,
in: strings.Repeat("HELLO", 2000),
out: strings.Repeat("hello", 2000),
outFull: strings.Repeat("hello", 2000),
errSpan: transform.ErrEndOfSpan,
err: nil,
t: lower,
}, {
desc: "large input non-ASCII",
szDst: 12000,
atEOF: false,
in: strings.Repeat("\u3333", 2000),
out: strings.Repeat("\u3333", 2000),
outFull: strings.Repeat("\u3333", 2000),
err: nil,
t: lower,
}} {
tt.check(t, i)
}
}
// upperAtEOF is a strange Transformer that converts text to uppercase, but only
// if atEOF is true.
type upperAtEOF struct{ transform.NopResetter }
func (upperAtEOF) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
if !atEOF {
return 0, 0, transform.ErrShortSrc
}
return toUpper.Transform(dst, src, atEOF)
}
func (upperAtEOF) Span(src []byte, atEOF bool) (n int, err error) {
if !atEOF {
return 0, transform.ErrShortSrc
}
return toUpper.Span(src, atEOF)
}
func BenchmarkConditional(b *testing.B) {
doBench(b, If(In(unicode.Hangul), transform.Nop, transform.Nop))
}

View file

@ -1,60 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runes_test
import (
"fmt"
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
func ExampleRemove() {
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
s, _, _ := transform.String(t, "résumé")
fmt.Println(s)
// Output:
// resume
}
func ExampleMap() {
replaceHyphens := runes.Map(func(r rune) rune {
if unicode.Is(unicode.Hyphen, r) {
return '|'
}
return r
})
s, _, _ := transform.String(replaceHyphens, "a-bc⸗d﹣e")
fmt.Println(s)
// Output:
// a|b|c|d|e
}
func ExampleIn() {
// Convert Latin characters to their canonical form, while keeping other
// width distinctions.
t := runes.If(runes.In(unicode.Latin), width.Fold, nil)
s, _, _ := transform.String(t, "アルアノリウ tech / アルアノリウ ")
fmt.Println(s)
// Output:
// アルアノリウ tech / アルアノリウ tech
}
func ExampleIf() {
// Widen everything but ASCII.
isASCII := func(r rune) bool { return r <= unicode.MaxASCII }
t := runes.If(runes.Predicate(isASCII), nil, width.Widen)
s, _, _ := transform.String(t, "アルアノリウ tech / 中國 / 5₩")
fmt.Println(s)
// Output:
// アルアノリウ tech / 中國 / 5₩
}

View file

@ -1,664 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package runes
import (
"strings"
"testing"
"unicode/utf8"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/transform"
)
type transformTest struct {
desc string
szDst int
atEOF bool
repl string
in string
out string // result string of first call to Transform
outFull string // transform of entire input string
err error
errSpan error
nSpan int
t transform.SpanningTransformer
}
const large = 10240
func (tt *transformTest) check(t *testing.T, i int) {
if tt.t == nil {
return
}
dst := make([]byte, tt.szDst)
src := []byte(tt.in)
nDst, nSrc, err := tt.t.Transform(dst, src, tt.atEOF)
if err != tt.err {
t.Errorf("%d:%s:error: got %v; want %v", i, tt.desc, err, tt.err)
}
if got := string(dst[:nDst]); got != tt.out {
t.Errorf("%d:%s:out: got %q; want %q", i, tt.desc, got, tt.out)
}
// Calls tt.t.Transform for the remainder of the input. We use this to test
// the nSrc return value.
out := make([]byte, large)
n := copy(out, dst[:nDst])
nDst, _, _ = tt.t.Transform(out[n:], src[nSrc:], true)
if got, want := string(out[:n+nDst]), tt.outFull; got != want {
t.Errorf("%d:%s:outFull: got %q; want %q", i, tt.desc, got, want)
}
tt.t.Reset()
p := 0
for ; p < len(tt.in) && p < len(tt.outFull) && tt.in[p] == tt.outFull[p]; p++ {
}
if tt.nSpan != 0 {
p = tt.nSpan
}
if n, err = tt.t.Span([]byte(tt.in), tt.atEOF); n != p || err != tt.errSpan {
t.Errorf("%d:%s:span: got %d, %v; want %d, %v", i, tt.desc, n, err, p, tt.errSpan)
}
}
func idem(r rune) rune { return r }
func TestMap(t *testing.T) {
runes := []rune{'a', 'ç', '中', '\U00012345', 'a'}
// Default mapper used for this test.
rotate := Map(func(r rune) rune {
for i, m := range runes {
if m == r {
return runes[i+1]
}
}
return r
})
for i, tt := range []transformTest{{
desc: "empty",
szDst: large,
atEOF: true,
in: "",
out: "",
outFull: "",
t: rotate,
}, {
desc: "no change",
szDst: 1,
atEOF: true,
in: "b",
out: "b",
outFull: "b",
t: rotate,
}, {
desc: "short dst",
szDst: 2,
atEOF: true,
in: "aaaa",
out: "ç",
outFull: "çççç",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "short dst ascii, no change",
szDst: 2,
atEOF: true,
in: "bbb",
out: "bb",
outFull: "bbb",
err: transform.ErrShortDst,
t: rotate,
}, {
desc: "short dst writing error",
szDst: 2,
atEOF: false,
in: "a\x80",
out: "ç",
outFull: "ç\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "short dst writing incomplete rune",
szDst: 2,
atEOF: true,
in: "a\xc0",
out: "ç",
outFull: "ç\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "short dst, longer",
szDst: 5,
atEOF: true,
in: "Hellø",
out: "Hell",
outFull: "Hellø",
err: transform.ErrShortDst,
t: rotate,
}, {
desc: "short dst, single",
szDst: 1,
atEOF: false,
in: "ø",
out: "",
outFull: "ø",
err: transform.ErrShortDst,
t: Map(idem),
}, {
desc: "short dst, longer, writing error",
szDst: 8,
atEOF: false,
in: "\x80Hello\x80",
out: "\ufffdHello",
outFull: "\ufffdHello\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "short src",
szDst: 2,
atEOF: false,
in: "a\xc2",
out: "ç",
outFull: "ç\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "invalid input, atEOF",
szDst: large,
atEOF: true,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "invalid input, !atEOF",
szDst: large,
atEOF: false,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "incomplete rune !atEOF",
szDst: large,
atEOF: false,
in: "\xc2",
out: "",
outFull: "\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
t: rotate,
}, {
desc: "invalid input, incomplete rune atEOF",
szDst: large,
atEOF: true,
in: "\xc2",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "misc correct",
szDst: large,
atEOF: true,
in: "a\U00012345 ç!",
out: "ça 中!",
outFull: "ça 中!",
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "misc correct and invalid",
szDst: large,
atEOF: true,
in: "Hello\x80 w\x80orl\xc0d!\xc0",
out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "misc correct and invalid, short src",
szDst: large,
atEOF: false,
in: "Hello\x80 w\x80orl\xc0d!\xc2",
out: "Hello\ufffd w\ufffdorl\ufffdd!",
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: rotate,
}, {
desc: "misc correct and invalid, short src, replacing RuneError",
szDst: large,
atEOF: false,
in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
out: "Hel?lo? w?orl?d!",
outFull: "Hel?lo? w?orl?d!?",
errSpan: transform.ErrEndOfSpan,
err: transform.ErrShortSrc,
t: Map(func(r rune) rune {
if r == utf8.RuneError {
return '?'
}
return r
}),
}} {
tt.check(t, i)
}
}
func TestRemove(t *testing.T) {
remove := Remove(Predicate(func(r rune) bool {
return strings.ContainsRune("aeiou\u0300\uFF24\U00012345", r)
}))
for i, tt := range []transformTest{
0: {
szDst: large,
atEOF: true,
in: "",
out: "",
outFull: "",
t: remove,
},
1: {
szDst: 0,
atEOF: true,
in: "aaaa",
out: "",
outFull: "",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
2: {
szDst: 1,
atEOF: true,
in: "aaaa",
out: "",
outFull: "",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
3: {
szDst: 1,
atEOF: true,
in: "baaaa",
out: "b",
outFull: "b",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
4: {
szDst: 2,
atEOF: true,
in: "açaaa",
out: "ç",
outFull: "ç",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
5: {
szDst: 2,
atEOF: true,
in: "aaaç",
out: "ç",
outFull: "ç",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
6: {
szDst: 2,
atEOF: false,
in: "a\x80",
out: "",
outFull: "\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
7: {
szDst: 1,
atEOF: true,
in: "a\xc0",
out: "",
outFull: "\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
8: {
szDst: 1,
atEOF: false,
in: "a\xc2",
out: "",
outFull: "\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
9: {
szDst: large,
atEOF: true,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
10: {
szDst: large,
atEOF: false,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
11: {
szDst: large,
atEOF: true,
in: "\xc2",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
12: {
szDst: large,
atEOF: false,
in: "\xc2",
out: "",
outFull: "\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
t: remove,
},
13: {
szDst: large,
atEOF: true,
in: "Hello \U00012345world!",
out: "Hll wrld!",
outFull: "Hll wrld!",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
14: {
szDst: large,
atEOF: true,
in: "Hello\x80 w\x80orl\xc0d!\xc0",
out: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
errSpan: transform.ErrEndOfSpan,
t: remove,
},
15: {
szDst: large,
atEOF: false,
in: "Hello\x80 w\x80orl\xc0d!\xc2",
out: "Hll\ufffd w\ufffdrl\ufffdd!",
outFull: "Hll\ufffd w\ufffdrl\ufffdd!\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
16: {
szDst: large,
atEOF: false,
in: "Hel\ufffdlo\x80 w\x80orl\xc0d!\xc2",
out: "Hello world!",
outFull: "Hello world!",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: Remove(Predicate(func(r rune) bool { return r == utf8.RuneError })),
},
17: {
szDst: 4,
atEOF: true,
in: "Hellø",
out: "Hll",
outFull: "Hllø",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
18: {
szDst: 4,
atEOF: false,
in: "Hellø",
out: "Hll",
outFull: "Hllø",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
19: {
szDst: 8,
atEOF: false,
in: "\x80Hello\uFF24\x80",
out: "\ufffdHll",
outFull: "\ufffdHll\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: remove,
},
20: {
szDst: 8,
atEOF: false,
in: "Hllll",
out: "Hllll",
outFull: "Hllll",
t: remove,
}} {
tt.check(t, i)
}
}
func TestReplaceIllFormed(t *testing.T) {
replace := ReplaceIllFormed()
for i, tt := range []transformTest{
0: {
szDst: large,
atEOF: true,
in: "",
out: "",
outFull: "",
t: replace,
},
1: {
szDst: 1,
atEOF: true,
in: "aa",
out: "a",
outFull: "aa",
err: transform.ErrShortDst,
t: replace,
},
2: {
szDst: 1,
atEOF: true,
in: "a\x80",
out: "a",
outFull: "a\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: replace,
},
3: {
szDst: 1,
atEOF: true,
in: "a\xc2",
out: "a",
outFull: "a\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: replace,
},
4: {
szDst: large,
atEOF: true,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: replace,
},
5: {
szDst: large,
atEOF: false,
in: "\x80",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: replace,
},
6: {
szDst: large,
atEOF: true,
in: "\xc2",
out: "\ufffd",
outFull: "\ufffd",
errSpan: transform.ErrEndOfSpan,
t: replace,
},
7: {
szDst: large,
atEOF: false,
in: "\xc2",
out: "",
outFull: "\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrShortSrc,
t: replace,
},
8: {
szDst: large,
atEOF: true,
in: "Hello world!",
out: "Hello world!",
outFull: "Hello world!",
t: replace,
},
9: {
szDst: large,
atEOF: true,
in: "Hello\x80 w\x80orl\xc2d!\xc2",
out: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
errSpan: transform.ErrEndOfSpan,
t: replace,
},
10: {
szDst: large,
atEOF: false,
in: "Hello\x80 w\x80orl\xc2d!\xc2",
out: "Hello\ufffd w\ufffdorl\ufffdd!",
outFull: "Hello\ufffd w\ufffdorl\ufffdd!\ufffd",
err: transform.ErrShortSrc,
errSpan: transform.ErrEndOfSpan,
t: replace,
},
16: {
szDst: 10,
atEOF: false,
in: "\x80Hello\x80",
out: "\ufffdHello",
outFull: "\ufffdHello\ufffd",
err: transform.ErrShortDst,
errSpan: transform.ErrEndOfSpan,
t: replace,
},
17: {
szDst: 10,
atEOF: false,
in: "\ufffdHello\ufffd",
out: "\ufffdHello",
outFull: "\ufffdHello\ufffd",
err: transform.ErrShortDst,
t: replace,
},
} {
tt.check(t, i)
}
}
func TestMapAlloc(t *testing.T) {
if n := testtext.AllocsPerRun(3, func() {
Map(idem).Transform(nil, nil, false)
}); n > 0 {
t.Errorf("got %f; want 0", n)
}
}
func rmNop(r rune) bool { return false }
func TestRemoveAlloc(t *testing.T) {
if n := testtext.AllocsPerRun(3, func() {
Remove(Predicate(rmNop)).Transform(nil, nil, false)
}); n > 0 {
t.Errorf("got %f; want 0", n)
}
}
func TestReplaceIllFormedAlloc(t *testing.T) {
if n := testtext.AllocsPerRun(3, func() {
ReplaceIllFormed().Transform(nil, nil, false)
}); n > 0 {
t.Errorf("got %f; want 0", n)
}
}
func doBench(b *testing.B, t Transformer) {
for _, bc := range []struct{ name, data string }{
{"ascii", testtext.ASCII},
{"3byte", testtext.ThreeByteUTF8},
} {
dst := make([]byte, 2*len(bc.data))
src := []byte(bc.data)
testtext.Bench(b, bc.name+"/transform", func(b *testing.B) {
b.SetBytes(int64(len(src)))
for i := 0; i < b.N; i++ {
t.Transform(dst, src, true)
}
})
src = t.Bytes(src)
t.Reset()
testtext.Bench(b, bc.name+"/span", func(b *testing.B) {
b.SetBytes(int64(len(src)))
for i := 0; i < b.N; i++ {
t.Span(src, true)
}
})
}
}
func BenchmarkRemove(b *testing.B) {
doBench(b, Remove(Predicate(func(r rune) bool { return r == 'e' })))
}
func BenchmarkMapAll(b *testing.B) {
doBench(b, Map(func(r rune) rune { return 'a' }))
}
func BenchmarkMapNone(b *testing.B) {
doBench(b, Map(func(r rune) rune { return r }))
}
func BenchmarkReplaceIllFormed(b *testing.B) {
doBench(b, ReplaceIllFormed())
}
var (
input = strings.Repeat("Thé qüick brøwn føx jumps øver the lazy døg. ", 100)
)

View file

@ -1,54 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidirule
import (
"testing"
"golang.org/x/text/internal/testtext"
)
var benchData = []struct{ name, data string }{
{"ascii", "Scheveningen"},
{"arabic", "دبي"},
{"hangul", "다음과"},
}
func doBench(b *testing.B, fn func(b *testing.B, data string)) {
for _, d := range benchData {
testtext.Bench(b, d.name, func(b *testing.B) { fn(b, d.data) })
}
}
func BenchmarkSpan(b *testing.B) {
r := New()
doBench(b, func(b *testing.B, str string) {
b.SetBytes(int64(len(str)))
data := []byte(str)
for i := 0; i < b.N; i++ {
r.Reset()
r.Span(data, true)
}
})
}
func BenchmarkDirectionASCII(b *testing.B) {
doBench(b, func(b *testing.B, str string) {
b.SetBytes(int64(len(str)))
data := []byte(str)
for i := 0; i < b.N; i++ {
Direction(data)
}
})
}
func BenchmarkDirectionStringASCII(b *testing.B) {
doBench(b, func(b *testing.B, str string) {
b.SetBytes(int64(len(str)))
for i := 0; i < b.N; i++ {
DirectionString(str)
}
})
}

View file

@ -203,10 +203,6 @@ func (t *Transformer) isRTL() bool {
return t.seen&isRTL != 0
}
func (t *Transformer) isFinal() bool {
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}
// Reset implements transform.Transformer.
func (t *Transformer) Reset() { *t = Transformer{} }

View file

@ -0,0 +1,11 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build go1.10
package bidirule
func (t *Transformer) isFinal() bool {
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}

View file

@ -0,0 +1,14 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !go1.10
package bidirule
func (t *Transformer) isFinal() bool {
if !t.isRTL() {
return true
}
return t.state == ruleLTRFinal || t.state == ruleRTLFinal || t.state == ruleInitial
}

View file

@ -1,851 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidirule
import (
"fmt"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/bidi"
)
const (
strL = "ABC" // Left to right - most letters in LTR scripts
strR = "עברית" // Right to left - most letters in non-Arabic RTL scripts
strAL = "دبي" // Arabic letters - most letters in the Arabic script
strEN = "123" // European Number (0-9, and Extended Arabic-Indic numbers)
strES = "+-" // European Number Separator (+ and -)
strET = "$" // European Number Terminator (currency symbols, the hash sign, the percent sign and so on)
strAN = "\u0660" // Arabic Number; this encompasses the Arabic-Indic numbers, but not the Extended Arabic-Indic numbers
strCS = "," // Common Number Separator (. , / : et al)
strNSM = "\u0300" // Nonspacing Mark - most combining accents
strBN = "\u200d" // Boundary Neutral - control characters (ZWNJ, ZWJ, and others)
strB = "\u2029" // Paragraph Separator
strS = "\u0009" // Segment Separator
strWS = " " // Whitespace, including the SPACE character
strON = "@" // Other Neutrals, including @, &, parentheses, MIDDLE DOT
)
type ruleTest struct {
in string
dir bidi.Direction
n int // position at which the rule fails
err error
// For tests that split the string in two.
pSrc int // number of source bytes to consume first
szDst int // size of destination buffer
nSrc int // source bytes consumed and bytes written
err0 error // error after first run
}
var testCases = [][]ruleTest{
// Go-specific rules.
// Invalid UTF-8 is invalid.
0: []ruleTest{{
in: "",
dir: bidi.LeftToRight,
}, {
in: "\x80",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 0,
}, {
in: "\xcc",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 0,
}, {
in: "abc\x80",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 3,
}, {
in: "abc\xcc",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 3,
}, {
in: "abc\xccdef",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 3,
}, {
in: "\xccdef",
dir: bidi.LeftToRight,
err: ErrInvalid,
n: 0,
}, {
in: strR + "\x80",
dir: bidi.RightToLeft,
err: ErrInvalid,
n: len(strR),
}, {
in: strR + "\xcc",
dir: bidi.RightToLeft,
err: ErrInvalid,
n: len(strR),
}, {
in: strAL + "\xcc" + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: len(strAL),
}, {
in: "\xcc" + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 0,
}},
// Rule 2.1: The first character must be a character with Bidi property L,
// R, or AL. If it has the R or AL property, it is an RTL label; if it has
// the L property, it is an LTR label.
1: []ruleTest{{
in: strL,
dir: bidi.LeftToRight,
}, {
in: strR,
dir: bidi.RightToLeft,
}, {
in: strAL,
dir: bidi.RightToLeft,
}, {
in: strAN,
dir: bidi.RightToLeft,
err: ErrInvalid,
}, {
in: strEN,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strEN),
}, {
in: strES,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strES),
}, {
in: strET,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strET),
}, {
in: strCS,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strCS),
}, {
in: strNSM,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strNSM),
}, {
in: strBN,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strBN),
}, {
in: strB,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strB),
}, {
in: strS,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strS),
}, {
in: strWS,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strWS),
}, {
in: strON,
dir: bidi.LeftToRight,
err: ErrInvalid,
n: len(strON),
}, {
in: strEN + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 3,
}, {
in: strES + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 2,
}, {
in: strET + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 1,
}, {
in: strCS + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 1,
}, {
in: strNSM + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 2,
}, {
in: strBN + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 3,
}, {
in: strB + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 3,
}, {
in: strS + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 1,
}, {
in: strWS + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 1,
}, {
in: strON + strR,
dir: bidi.RightToLeft,
err: ErrInvalid,
n: 1,
}},
// Rule 2.2: In an RTL label, only characters with the Bidi properties R,
// AL, AN, EN, ES, CS, ET, ON, BN, or NSM are allowed.
2: []ruleTest{{
in: strR + strR + strAL,
dir: bidi.RightToLeft,
}, {
in: strR + strAL + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strAN + strAL,
dir: bidi.RightToLeft,
}, {
in: strR + strEN + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strES + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strCS + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strET + strAL,
dir: bidi.RightToLeft,
}, {
in: strR + strON + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strBN + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strNSM + strAL,
dir: bidi.RightToLeft,
}, {
in: strR + strL + strR,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strB + strR,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strS + strAL,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strWS + strAL,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strAL + strR + strAL,
dir: bidi.RightToLeft,
}, {
in: strAL + strAL + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strAN + strAL,
dir: bidi.RightToLeft,
}, {
in: strAL + strEN + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strES + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strCS + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strET + strAL,
dir: bidi.RightToLeft,
}, {
in: strAL + strON + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strBN + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strNSM + strAL,
dir: bidi.RightToLeft,
}, {
in: strAL + strL + strR,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strB + strR,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strS + strAL,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strWS + strAL,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}},
// Rule 2.3: In an RTL label, the end of the label must be a character with
// Bidi property R, AL, EN, or AN, followed by zero or more characters with
// Bidi property NSM.
3: []ruleTest{{
in: strR + strNSM,
dir: bidi.RightToLeft,
}, {
in: strR + strR,
dir: bidi.RightToLeft,
}, {
in: strR + strAL + strNSM,
dir: bidi.RightToLeft,
}, {
in: strR + strEN + strNSM + strNSM,
dir: bidi.RightToLeft,
}, {
in: strR + strAN,
dir: bidi.RightToLeft,
}, {
in: strR + strES + strNSM,
dir: bidi.RightToLeft,
n: len(strR + strES + strNSM),
err: ErrInvalid,
}, {
in: strR + strCS + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strR + strCS + strNSM + strNSM),
err: ErrInvalid,
}, {
in: strR + strET,
dir: bidi.RightToLeft,
n: len(strR + strET),
err: ErrInvalid,
}, {
in: strR + strON + strNSM,
dir: bidi.RightToLeft,
n: len(strR + strON + strNSM),
err: ErrInvalid,
}, {
in: strR + strBN + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strR + strBN + strNSM + strNSM),
err: ErrInvalid,
}, {
in: strR + strL + strNSM,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strB + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strS,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strR + strWS,
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
}, {
in: strAL + strNSM,
dir: bidi.RightToLeft,
}, {
in: strAL + strR,
dir: bidi.RightToLeft,
}, {
in: strAL + strAL + strNSM,
dir: bidi.RightToLeft,
}, {
in: strAL + strEN + strNSM + strNSM,
dir: bidi.RightToLeft,
}, {
in: strAL + strAN,
dir: bidi.RightToLeft,
}, {
in: strAL + strES + strNSM,
dir: bidi.RightToLeft,
n: len(strAL + strES + strNSM),
err: ErrInvalid,
}, {
in: strAL + strCS + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strAL + strCS + strNSM + strNSM),
err: ErrInvalid,
}, {
in: strAL + strET,
dir: bidi.RightToLeft,
n: len(strAL + strET),
err: ErrInvalid,
}, {
in: strAL + strON + strNSM,
dir: bidi.RightToLeft,
n: len(strAL + strON + strNSM),
err: ErrInvalid,
}, {
in: strAL + strBN + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strAL + strBN + strNSM + strNSM),
err: ErrInvalid,
}, {
in: strAL + strL + strNSM,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strB + strNSM + strNSM,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strS,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}, {
in: strAL + strWS,
dir: bidi.RightToLeft,
n: len(strAL),
err: ErrInvalid,
}},
// Rule 2.4: In an RTL label, if an EN is present, no AN may be present,
// and vice versa.
4: []ruleTest{{
in: strR + strEN + strAN,
dir: bidi.RightToLeft,
n: len(strR + strEN),
err: ErrInvalid,
}, {
in: strR + strAN + strEN + strNSM,
dir: bidi.RightToLeft,
n: len(strR + strAN),
err: ErrInvalid,
}, {
in: strAL + strEN + strAN,
dir: bidi.RightToLeft,
n: len(strAL + strEN),
err: ErrInvalid,
}, {
in: strAL + strAN + strEN + strNSM,
dir: bidi.RightToLeft,
n: len(strAL + strAN),
err: ErrInvalid,
}},
// Rule 2.5: In an LTR label, only characters with the Bidi properties L,
// EN, ES, CS, ET, ON, BN, or NSM are allowed.
5: []ruleTest{{
in: strL + strL + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strEN + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strES + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strCS + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strET + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strON + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strBN + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strNSM + strL,
dir: bidi.LeftToRight,
}, {
in: strL + strR + strL,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strAL + strL,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strAN + strL,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strB + strL,
dir: bidi.LeftToRight,
n: len(strL + strB + strL),
err: ErrInvalid,
}, {
in: strL + strB + strL + strR,
dir: bidi.RightToLeft,
n: len(strL + strB + strL),
err: ErrInvalid,
}, {
in: strL + strS + strL,
dir: bidi.LeftToRight,
n: len(strL + strS + strL),
err: ErrInvalid,
}, {
in: strL + strS + strL + strR,
dir: bidi.RightToLeft,
n: len(strL + strS + strL),
err: ErrInvalid,
}, {
in: strL + strWS + strL,
dir: bidi.LeftToRight,
n: len(strL + strWS + strL),
err: ErrInvalid,
}, {
in: strL + strWS + strL + strR,
dir: bidi.RightToLeft,
n: len(strL + strWS + strL),
err: ErrInvalid,
}},
// Rule 2.6: In an LTR label, the end of the label must be a character with
// Bidi property L or EN, followed by zero or more characters with Bidi
// property NSM.
6: []ruleTest{{
in: strL,
dir: bidi.LeftToRight,
}, {
in: strL + strNSM,
dir: bidi.LeftToRight,
}, {
in: strL + strNSM + strNSM,
dir: bidi.LeftToRight,
}, {
in: strL + strEN,
dir: bidi.LeftToRight,
}, {
in: strL + strEN + strNSM,
dir: bidi.LeftToRight,
}, {
in: strL + strEN + strNSM + strNSM,
dir: bidi.LeftToRight,
}, {
in: strL + strES,
dir: bidi.LeftToRight,
n: len(strL + strES),
err: ErrInvalid,
}, {
in: strL + strES + strR,
dir: bidi.RightToLeft,
n: len(strL + strES),
err: ErrInvalid,
}, {
in: strL + strCS,
dir: bidi.LeftToRight,
n: len(strL + strCS),
err: ErrInvalid,
}, {
in: strL + strCS + strR,
dir: bidi.RightToLeft,
n: len(strL + strCS),
err: ErrInvalid,
}, {
in: strL + strET,
dir: bidi.LeftToRight,
n: len(strL + strET),
err: ErrInvalid,
}, {
in: strL + strET + strR,
dir: bidi.RightToLeft,
n: len(strL + strET),
err: ErrInvalid,
}, {
in: strL + strON,
dir: bidi.LeftToRight,
n: len(strL + strON),
err: ErrInvalid,
}, {
in: strL + strON + strR,
dir: bidi.RightToLeft,
n: len(strL + strON),
err: ErrInvalid,
}, {
in: strL + strBN,
dir: bidi.LeftToRight,
n: len(strL + strBN),
err: ErrInvalid,
}, {
in: strL + strBN + strR,
dir: bidi.RightToLeft,
n: len(strL + strBN),
err: ErrInvalid,
}, {
in: strL + strR,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strAL,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strAN,
dir: bidi.RightToLeft,
n: len(strL),
err: ErrInvalid,
}, {
in: strL + strB,
dir: bidi.LeftToRight,
n: len(strL + strB),
err: ErrInvalid,
}, {
in: strL + strB + strR,
dir: bidi.RightToLeft,
n: len(strL + strB),
err: ErrInvalid,
}, {
in: strL + strS,
dir: bidi.LeftToRight,
n: len(strL + strS),
err: ErrInvalid,
}, {
in: strL + strS + strR,
dir: bidi.RightToLeft,
n: len(strL + strS),
err: ErrInvalid,
}, {
in: strL + strWS,
dir: bidi.LeftToRight,
n: len(strL + strWS),
err: ErrInvalid,
}, {
in: strL + strWS + strR,
dir: bidi.RightToLeft,
n: len(strL + strWS),
err: ErrInvalid,
}},
// Incremental processing.
9: []ruleTest{{
in: "e\u0301", // é
dir: bidi.LeftToRight,
pSrc: 2,
nSrc: 1,
err0: transform.ErrShortSrc,
}, {
in: "e\u1000f", // é
dir: bidi.LeftToRight,
pSrc: 3,
nSrc: 1,
err0: transform.ErrShortSrc,
}, {
// Remain invalid once invalid.
in: strR + "ab",
dir: bidi.RightToLeft,
n: len(strR),
err: ErrInvalid,
pSrc: len(strR) + 1,
nSrc: len(strR),
err0: ErrInvalid,
}, {
// Short destination
in: "abcdefghij",
dir: bidi.LeftToRight,
pSrc: 10,
szDst: 5,
nSrc: 5,
err0: transform.ErrShortDst,
}, {
in: "\U000102f7",
dir: bidi.LeftToRight,
n: len("\U000102f7"),
err: ErrInvalid,
}, {
// Short destination splitting input rune
in: "e\u0301",
dir: bidi.LeftToRight,
pSrc: 3,
szDst: 2,
nSrc: 1,
err0: transform.ErrShortDst,
}, {
// Unicode 10.0.0 IDNA test string.
in: "FAX\u2a77\U0001d186",
dir: bidi.LeftToRight,
n: len("FAX\u2a77\U0001d186"),
err: ErrInvalid,
}, {
in: "\x80\u0660",
dir: bidi.RightToLeft,
n: 0,
err: ErrInvalid,
}},
}
func init() {
for rule, cases := range testCases {
for i, tc := range cases {
if tc.err == nil {
testCases[rule][i].n = len(tc.in)
}
}
}
}
func doTests(t *testing.T, fn func(t *testing.T, tc ruleTest)) {
for rule, cases := range testCases {
for i, tc := range cases {
name := fmt.Sprintf("%d/%d:%+q:%s", rule, i, tc.in, tc.in)
testtext.Run(t, name, func(t *testing.T) {
fn(t, tc)
})
}
}
}
func TestDirection(t *testing.T) {
doTests(t, func(t *testing.T, tc ruleTest) {
dir := Direction([]byte(tc.in))
if dir != tc.dir {
t.Errorf("dir was %v; want %v", dir, tc.dir)
}
})
}
func TestDirectionString(t *testing.T) {
doTests(t, func(t *testing.T, tc ruleTest) {
dir := DirectionString(tc.in)
if dir != tc.dir {
t.Errorf("dir was %v; want %v", dir, tc.dir)
}
})
}
func TestValid(t *testing.T) {
doTests(t, func(t *testing.T, tc ruleTest) {
got := Valid([]byte(tc.in))
want := tc.err == nil
if got != want {
t.Fatalf("Valid: got %v; want %v", got, want)
}
got = ValidString(tc.in)
want = tc.err == nil
if got != want {
t.Fatalf("Valid: got %v; want %v", got, want)
}
})
}
func TestSpan(t *testing.T) {
doTests(t, func(t *testing.T, tc ruleTest) {
// Skip tests that test for limited destination buffer size.
if tc.szDst > 0 {
return
}
r := New()
src := []byte(tc.in)
n, err := r.Span(src[:tc.pSrc], tc.pSrc == len(tc.in))
if err != tc.err0 {
t.Errorf("err0 was %v; want %v", err, tc.err0)
}
if n != tc.nSrc {
t.Fatalf("nSrc was %d; want %d", n, tc.nSrc)
}
n, err = r.Span(src[n:], true)
if err != tc.err {
t.Errorf("error was %v; want %v", err, tc.err)
}
if got := n + tc.nSrc; got != tc.n {
t.Errorf("n was %d; want %d", got, tc.n)
}
})
}
func TestTransform(t *testing.T) {
doTests(t, func(t *testing.T, tc ruleTest) {
r := New()
src := []byte(tc.in)
dst := make([]byte, len(tc.in))
if tc.szDst > 0 {
dst = make([]byte, tc.szDst)
}
// First transform operates on a zero-length string for most tests.
nDst, nSrc, err := r.Transform(dst, src[:tc.pSrc], tc.pSrc == len(tc.in))
if err != tc.err0 {
t.Errorf("err0 was %v; want %v", err, tc.err0)
}
if nDst != nSrc {
t.Fatalf("nDst (%d) and nSrc (%d) should match", nDst, nSrc)
}
if nSrc != tc.nSrc {
t.Fatalf("nSrc was %d; want %d", nSrc, tc.nSrc)
}
dst1 := make([]byte, len(tc.in))
copy(dst1, dst[:nDst])
nDst, nSrc, err = r.Transform(dst1[nDst:], src[nSrc:], true)
if err != tc.err {
t.Errorf("error was %v; want %v", err, tc.err)
}
if nDst != nSrc {
t.Fatalf("nDst (%d) and nSrc (%d) should match", nDst, nSrc)
}
n := nSrc + tc.nSrc
if n != tc.n {
t.Fatalf("n was %d; want %d", n, tc.n)
}
if got, want := string(dst1[:n]), tc.in[:tc.n]; got != want {
t.Errorf("got %+q; want %+q", got, want)
}
})
}

View file

@ -1,37 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package transform_test
import (
"fmt"
"unicode"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
func ExampleRemoveFunc() {
input := []byte(`tschüß; до свидания`)
b := make([]byte, len(input))
t := transform.RemoveFunc(unicode.IsSpace)
n, _, _ := t.Transform(b, input, true)
fmt.Println(string(b[:n]))
t = transform.RemoveFunc(func(r rune) bool {
return !unicode.Is(unicode.Latin, r)
})
n, _, _ = t.Transform(b, input, true)
fmt.Println(string(b[:n]))
n, _, _ = t.Transform(b, norm.NFD.Bytes(input), true)
fmt.Println(string(b[:n]))
// Output:
// tschüß;досвидания
// tschüß
// tschuß
}

File diff suppressed because it is too large Load diff

View file

@ -1,224 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"flag"
"fmt"
"log"
"strconv"
"strings"
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/norm"
)
var testLevels = flag.Bool("levels", false, "enable testing of levels")
// TestBidiCore performs the tests in BidiTest.txt.
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiTest.txt.
func TestBidiCore(t *testing.T) {
testtext.SkipIfNotLong(t)
r := gen.OpenUCDFile("BidiTest.txt")
defer r.Close()
var wantLevels, wantOrder []string
p := ucd.New(r, ucd.Part(func(p *ucd.Parser) {
s := strings.Split(p.String(0), ":")
switch s[0] {
case "Levels":
wantLevels = strings.Fields(s[1])
case "Reorder":
wantOrder = strings.Fields(s[1])
default:
log.Fatalf("Unknown part %q.", s[0])
}
}))
for p.Next() {
types := []Class{}
for _, s := range p.Strings(0) {
types = append(types, bidiClass[s])
}
// We ignore the bracketing part of the algorithm.
pairTypes := make([]bracketType, len(types))
pairValues := make([]rune, len(types))
for i := uint(0); i < 3; i++ {
if p.Uint(1)&(1<<i) == 0 {
continue
}
lev := level(int(i) - 1)
par := newParagraph(types, pairTypes, pairValues, lev)
if *testLevels {
levels := par.getLevels([]int{len(types)})
for i, s := range wantLevels {
if s == "x" {
continue
}
l, _ := strconv.ParseUint(s, 10, 8)
if level(l)&1 != levels[i]&1 {
t.Errorf("%s:%d:levels: got %v; want %v", p.String(0), lev, levels, wantLevels)
break
}
}
}
order := par.getReordering([]int{len(types)})
gotOrder := filterOrder(types, order)
if got, want := fmt.Sprint(gotOrder), fmt.Sprint(wantOrder); got != want {
t.Errorf("%s:%d:order: got %v; want %v\noriginal %v", p.String(0), lev, got, want, order)
}
}
}
if err := p.Err(); err != nil {
log.Fatal(err)
}
}
var removeClasses = map[Class]bool{
LRO: true,
RLO: true,
RLE: true,
LRE: true,
PDF: true,
BN: true,
}
// TestBidiCharacters performs the tests in BidiCharacterTest.txt.
// See http://www.unicode.org/Public/UCD/latest/ucd/BidiCharacterTest.txt
func TestBidiCharacters(t *testing.T) {
testtext.SkipIfNotLong(t)
ucd.Parse(gen.OpenUCDFile("BidiCharacterTest.txt"), func(p *ucd.Parser) {
var (
types []Class
pairTypes []bracketType
pairValues []rune
parLevel level
wantLevel = level(p.Int(2))
wantLevels = p.Strings(3)
wantVisualOrder = p.Strings(4)
)
switch l := p.Int(1); l {
case 0, 1:
parLevel = level(l)
case 2:
parLevel = implicitLevel
default:
// Spec says to ignore unknown parts.
}
runes := p.Runes(0)
for _, r := range runes {
// Assign the bracket type.
if d := norm.NFKD.PropertiesString(string(r)).Decomposition(); d != nil {
r = []rune(string(d))[0]
}
p, _ := LookupRune(r)
// Assign the class for this rune.
types = append(types, p.Class())
switch {
case !p.IsBracket():
pairTypes = append(pairTypes, bpNone)
pairValues = append(pairValues, 0)
case p.IsOpeningBracket():
pairTypes = append(pairTypes, bpOpen)
pairValues = append(pairValues, r)
default:
pairTypes = append(pairTypes, bpClose)
pairValues = append(pairValues, p.reverseBracket(r))
}
}
par := newParagraph(types, pairTypes, pairValues, parLevel)
// Test results:
if got := par.embeddingLevel; got != wantLevel {
t.Errorf("%v:level: got %d; want %d", string(runes), got, wantLevel)
}
if *testLevels {
gotLevels := getLevelStrings(types, par.getLevels([]int{len(types)}))
if got, want := fmt.Sprint(gotLevels), fmt.Sprint(wantLevels); got != want {
t.Errorf("%04X %q:%d: got %v; want %v\nval: %x\npair: %v", runes, string(runes), parLevel, got, want, pairValues, pairTypes)
}
}
order := par.getReordering([]int{len(types)})
order = filterOrder(types, order)
if got, want := fmt.Sprint(order), fmt.Sprint(wantVisualOrder); got != want {
t.Errorf("%04X %q:%d: got %v; want %v\ngot order: %s", runes, string(runes), parLevel, got, want, reorder(runes, order))
}
})
}
func getLevelStrings(cl []Class, levels []level) []string {
var results []string
for i, l := range levels {
if !removeClasses[cl[i]] {
results = append(results, fmt.Sprint(l))
} else {
results = append(results, "x")
}
}
return results
}
func filterOrder(cl []Class, order []int) []int {
no := []int{}
for _, o := range order {
if !removeClasses[cl[o]] {
no = append(no, o)
}
}
return no
}
func reorder(r []rune, order []int) string {
nr := make([]rune, len(order))
for i, o := range order {
nr[i] = r[o]
}
return string(nr)
}
// bidiClass names and codes taken from class "bc" in
// http://www.unicode.org/Public/8.0.0/ucd/PropertyValueAliases.txt
var bidiClass = map[string]Class{
"AL": AL, // classArabicLetter,
"AN": AN, // classArabicNumber,
"B": B, // classParagraphSeparator,
"BN": BN, // classBoundaryNeutral,
"CS": CS, // classCommonSeparator,
"EN": EN, // classEuropeanNumber,
"ES": ES, // classEuropeanSeparator,
"ET": ET, // classEuropeanTerminator,
"L": L, // classLeftToRight,
"NSM": NSM, // classNonspacingMark,
"ON": ON, // classOtherNeutral,
"R": R, // classRightToLeft,
"S": S, // classSegmentSeparator,
"WS": WS, // classWhiteSpace,
"LRO": LRO, // classLeftToRightOverride,
"RLO": RLO, // classRightToLeftOverride,
"LRE": LRE, // classLeftToRightEmbedding,
"RLE": RLE, // classRightToLeftEmbedding,
"PDF": PDF, // classPopDirectionalFormat,
"LRI": LRI, // classLeftToRightIsolate,
"RLI": RLI, // classRightToLeftIsolate,
"FSI": FSI, // classFirstStrongIsolate,
"PDI": PDI, // classPopDirectionalIsolate,
}

View file

@ -59,7 +59,7 @@ func genTables() {
log.Fatalf("Too many Class constants (%#x > 0x0F).", numClass)
}
w := gen.NewCodeWriter()
defer w.WriteGoFile(*outputFile, "bidi")
defer w.WriteVersionedGoFile(*outputFile, "bidi")
gen.WriteUnicodeVersion(w)

View file

@ -1,53 +0,0 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
package bidi
import (
"unicode"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/ucd"
"golang.org/x/text/unicode/rangetable"
)
// These tables are hand-extracted from:
// http://www.unicode.org/Public/8.0.0/ucd/extracted/DerivedBidiClass.txt
func visitDefaults(fn func(r rune, c Class)) {
// first write default values for ranges listed above.
visitRunes(fn, AL, []rune{
0x0600, 0x07BF, // Arabic
0x08A0, 0x08FF, // Arabic Extended-A
0xFB50, 0xFDCF, // Arabic Presentation Forms
0xFDF0, 0xFDFF,
0xFE70, 0xFEFF,
0x0001EE00, 0x0001EEFF, // Arabic Mathematical Alpha Symbols
})
visitRunes(fn, R, []rune{
0x0590, 0x05FF, // Hebrew
0x07C0, 0x089F, // Nko et al.
0xFB1D, 0xFB4F,
0x00010800, 0x00010FFF, // Cypriot Syllabary et. al.
0x0001E800, 0x0001EDFF,
0x0001EF00, 0x0001EFFF,
})
visitRunes(fn, ET, []rune{ // European Terminator
0x20A0, 0x20Cf, // Currency symbols
})
rangetable.Visit(unicode.Noncharacter_Code_Point, func(r rune) {
fn(r, BN) // Boundary Neutral
})
ucd.Parse(gen.OpenUCDFile("DerivedCoreProperties.txt"), func(p *ucd.Parser) {
if p.String(1) == "Default_Ignorable_Code_Point" {
fn(p.Rune(0), BN) // Boundary Neutral
}
})
}
func visitRunes(fn func(r rune, c Class), c Class, runes []rune) {
for i := 0; i < len(runes); i += 2 {
lo, hi := runes[i], runes[i+1]
for j := lo; j <= hi; j++ {
fn(j, c)
}
}
}

View file

@ -1,5 +1,7 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// +build go1.10
package bidi
// UnicodeVersion is the Unicode version from which the tables in this package are derived.

1781
vendor/golang.org/x/text/unicode/bidi/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,82 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bidi
import (
"testing"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/internal/ucd"
)
var labels = []string{
AL: "AL",
AN: "AN",
B: "B",
BN: "BN",
CS: "CS",
EN: "EN",
ES: "ES",
ET: "ET",
L: "L",
NSM: "NSM",
ON: "ON",
R: "R",
S: "S",
WS: "WS",
LRO: "LRO",
RLO: "RLO",
LRE: "LRE",
RLE: "RLE",
PDF: "PDF",
LRI: "LRI",
RLI: "RLI",
FSI: "FSI",
PDI: "PDI",
}
func TestTables(t *testing.T) {
testtext.SkipIfNotLong(t)
ucd.Parse(gen.OpenUCDFile("BidiBrackets.txt"), func(p *ucd.Parser) {
r1 := p.Rune(0)
want := p.Rune(1)
e, _ := LookupRune(r1)
if got := e.reverseBracket(r1); got != want {
t.Errorf("Reverse(%U) = %U; want %U", r1, got, want)
}
})
done := map[rune]bool{}
test := func(name string, r rune, want string) {
str := string(r)
e, _ := LookupString(str)
if got := labels[e.Class()]; got != want {
t.Errorf("%s:%U: got %s; want %s", name, r, got, want)
}
if e2, sz := LookupRune(r); e != e2 || sz != len(str) {
t.Errorf("LookupRune(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
}
if e2, sz := Lookup([]byte(str)); e != e2 || sz != len(str) {
t.Errorf("Lookup(%U) = %v, %d; want %v, %d", r, e2, e, sz, len(str))
}
done[r] = true
}
// Insert the derived BiDi properties.
ucd.Parse(gen.OpenUCDFile("extracted/DerivedBidiClass.txt"), func(p *ucd.Parser) {
r := p.Rune(0)
test("derived", r, p.String(1))
})
visitDefaults(func(r rune, c Class) {
if !done[r] {
test("default", r, labels[c])
}
})
}

View file

@ -1,27 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import "testing"
func TestParseDraft(t *testing.T) {
tests := []struct {
in string
draft Draft
err bool
}{
{"unconfirmed", Unconfirmed, false},
{"provisional", Provisional, false},
{"contributed", Contributed, false},
{"approved", Approved, false},
{"", Approved, false},
{"foo", Approved, true},
}
for _, tt := range tests {
if d, err := ParseDraft(tt.in); d != tt.draft || (err != nil) != tt.err {
t.Errorf("%q: was %v, %v; want %v, %v", tt.in, d, err != nil, tt.draft, tt.err)
}
}
}

View file

@ -1,275 +0,0 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"fmt"
"strings"
"testing"
)
// A recorder implements the RuleProcessor interface, whereby its methods
// simply record the invocations.
type recorder struct {
calls []string
}
func (r *recorder) Reset(anchor string, before int) error {
if before > 5 {
return fmt.Errorf("before %d > 5", before)
}
r.calls = append(r.calls, fmt.Sprintf("R:%s-%d", anchor, before))
return nil
}
func (r *recorder) Insert(level int, str, context, extend string) error {
s := fmt.Sprintf("O:%d:%s", level, str)
if context != "" {
s += "|" + context
}
if extend != "" {
s += "/" + extend
}
r.calls = append(r.calls, s)
return nil
}
func (r *recorder) Index(id string) {
r.calls = append(r.calls, fmt.Sprintf("I:%s", id))
}
func (r *recorder) Error(err error) {
r.calls = append(r.calls, fmt.Sprintf("E:%v", err))
}
func TestRuleProcessor(t *testing.T) {
for _, tt := range []struct {
desc string
in string
out string
}{
{desc: "empty"},
{desc: "whitespace and comments only",
in: `
# adsfads
# adfadf
`,
},
{
desc: "reset anchor",
in: `
& a
&b #
& [ before 3 ] c
& [before 4] d & ee
& [first tertiary ignorable]
&'g'
& 'h''h'h'h'
&'\u0069' # LATIN SMALL LETTER I
`,
out: `
R:a-0
R:b-0
R:c-3
R:d-4
R:ee-0
R:<first tertiary ignorable/>-0
R:g-0
R:hhhh-0
R:i-0
`,
},
{
desc: "ordering",
in: `
& 0
< 1 <<''2#
<<< 3'3''33'3#
<<<<4
= 5 << 6 | s
<<<< 7 / z
<< 8'' | s / ch
`,
out: `
R:0-0
O:1:1
O:2:'2
O:3:33333
O:4:4
O:5:5
O:2:6|s
O:4:7/z
O:2:8'|s/ch
`,
},
{
desc: "index",
in: "< '\ufdd0'A",
out: "I:A",
},
{
desc: "sequence",
in: `
& 0
<<* 1234
<* a-cde-f
=* q-q
`,
out: `
R:0-0
O:2:1
O:2:2
O:2:3
O:2:4
O:1:a
O:1:b
O:1:c
O:1:d
O:1:e
O:1:f
O:5:q
`,
},
{
desc: "compact",
in: "&B<t<<<T<s<<<S<e<<<E",
out: `
R:B-0
O:1:t
O:3:T
O:1:s
O:3:S
O:1:e
O:3:E
`,
},
{
desc: "err operator",
in: "a",
out: "E:1: illegal operator 'a'",
},
{
desc: "err line number",
in: `& a
<< b
a`,
out: `
R:a-0
O:2:b
E:3: illegal operator 'a'`,
},
{
desc: "err empty anchor",
in: " & ",
out: "E:1: missing string",
},
{
desc: "err anchor invalid special 1",
in: " & [ foo ",
out: "E:1: unmatched bracket",
},
{
desc: "err anchor invalid special 2",
in: "&[",
out: "E:1: unmatched bracket",
},
{
desc: "err anchor invalid before 1",
in: "&[before a]",
out: `E:1: strconv.ParseUint: parsing "a": invalid syntax`,
},
{
desc: "err anchor invalid before 2",
in: "&[before 12]",
out: `E:1: strconv.ParseUint: parsing "12": value out of range`,
},
{
desc: "err anchor invalid before 3",
in: "&[before 2]",
out: "E:1: missing string",
},
{
desc: "err anchor invalid before 4",
in: "&[before 6] a",
out: "E:1: before 6 > 5",
},
{
desc: "err empty order",
in: " < ",
out: "E:1: missing string",
},
{
desc: "err empty identity",
in: " = ",
out: "E:1: missing string",
},
{
desc: "err empty context",
in: " < a | ",
out: "E:1: missing string after context",
},
{
desc: "err empty extend",
in: " < a / ",
out: "E:1: missing string after extension",
},
{
desc: "err empty sequence",
in: " <* ",
out: "E:1: empty sequence",
},
{
desc: "err sequence 1",
in: " <* -a",
out: "E:1: range without starter value",
},
{
desc: "err sequence 3",
in: " <* a-a-b",
out: `O:1:a
E:1: range without starter value
`,
},
{
desc: "err sequence 3",
in: " <* b-a",
out: `O:1:b
E:1: invalid range 'b'-'a'
`,
},
{
desc: "err unmatched quote",
in: " < 'b",
out: ` E:1: unmatched single quote
`,
},
} {
rec := &recorder{}
err := Collation{
Cr: []*Common{
{hidden: hidden{CharData: tt.in}},
},
}.Process(rec)
if err != nil {
rec.Error(err)
}
got := rec.calls
want := strings.Split(strings.TrimSpace(tt.out), "\n")
if tt.out == "" {
want = nil
}
if len(got) != len(want) {
t.Errorf("%s: nResults: got %d; want %d", tt.desc, len(got), len(want))
continue
}
for i, g := range got {
if want := strings.TrimSpace(want[i]); g != want {
t.Errorf("%s:%d: got %q; want %q", tt.desc, i, g, want)
}
}
}
}

View file

@ -1,186 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
// This file contains test data.
import (
"io"
"strings"
)
type testLoader struct {
}
func (t testLoader) Len() int {
return len(testFiles)
}
func (t testLoader) Path(i int) string {
return testPaths[i]
}
func (t testLoader) Reader(i int) (io.ReadCloser, error) {
return &reader{*strings.NewReader(testFiles[i])}, nil
}
// reader adds a dummy Close method to strings.Reader so that it
// satisfies the io.ReadCloser interface.
type reader struct {
strings.Reader
}
func (r reader) Close() error {
return nil
}
var (
testFiles = []string{de_xml, gsw_xml, root_xml}
testPaths = []string{
"common/main/de.xml",
"common/main/gsw.xml",
"common/main/root.xml",
}
)
var root_xml = `<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<ldml>
<identity>
<language type="root"/>
<generation date="now"/>
</identity>
<characters>
<exemplarCharacters>[]</exemplarCharacters>
<exemplarCharacters type="auxiliary">[]</exemplarCharacters>
<exemplarCharacters type="punctuation">[\- ' &quot; \&amp; #]</exemplarCharacters>
<ellipsis type="final">{0}</ellipsis>
<ellipsis type="initial">{0}</ellipsis>
<moreInformation>?</moreInformation>
</characters>
<dates>
<calendars>
<default choice="gregorian"/>
<calendar type="buddhist">
<months>
<alias source="locale" path="../../calendar[@type='gregorian']/months"/>
</months>
</calendar>
<calendar type="chinese">
<months>
<alias source="locale" path="../../calendar[@type='gregorian']/months"/>
</months>
</calendar>
<calendar type="gregorian">
<months>
<default choice="format"/>
<monthContext type="format">
<default choice="wide"/>
<monthWidth type="narrow">
<alias source="locale" path="../../monthContext[@type='stand-alone']/monthWidth[@type='narrow']"/>
</monthWidth>
<monthWidth type="wide">
<month type="1">11</month>
<month type="2">22</month>
<month type="3">33</month>
<month type="4">44</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="narrow">
<month type="1">1</month>
<month type="2">2</month>
<month type="3">3</month>
<month type="4">4</month>
</monthWidth>
<monthWidth type="wide">
<alias source="locale" path="../../monthContext[@type='format']/monthWidth[@type='wide']"/>
</monthWidth>
</monthContext>
</months>
</calendar>
</calendars>
</dates>
</ldml>
`
var de_xml = `<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<ldml>
<identity>
<language type="de"/>
</identity>
<characters>
<exemplarCharacters>[a ä b c d e ö p q r s ß t u ü v w x y z]</exemplarCharacters>
<exemplarCharacters type="auxiliary">[á à ă]</exemplarCharacters>
<exemplarCharacters type="index">[A B C D E F G H Z]</exemplarCharacters>
<ellipsis type="final">{0} </ellipsis>
<ellipsis type="initial"> {0}</ellipsis>
<moreInformation>?</moreInformation>
<stopwords>
<stopwordList type="collation" draft="provisional">der die das</stopwordList>
</stopwords>
</characters>
<dates>
<calendars>
<calendar type="buddhist">
<months>
<monthContext type="format">
<monthWidth type="narrow">
<month type="3">BBB</month>
</monthWidth>
<monthWidth type="wide">
<month type="3">bbb</month>
</monthWidth>
</monthContext>
</months>
</calendar>
<calendar type="gregorian">
<months>
<monthContext type="format">
<monthWidth type="narrow">
<month type="3">M</month>
<month type="4">A</month>
</monthWidth>
<monthWidth type="wide">
<month type="3">Maerz</month>
<month type="4">April</month>
<month type="5">Mai</month>
</monthWidth>
</monthContext>
<monthContext type="stand-alone">
<monthWidth type="narrow">
<month type="3">m</month>
<month type="5">m</month>
</monthWidth>
<monthWidth type="wide">
<month type="4">april</month>
<month type="5">mai</month>
</monthWidth>
</monthContext>
</months>
</calendar>
</calendars>
</dates>
<posix>
<messages>
<yesstr>yes:y</yesstr>
<nostr>no:n</nostr>
</messages>
</posix>
</ldml>
`
var gsw_xml = `<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE ldml SYSTEM "../../common/dtd/ldml.dtd">
<ldml>
<identity>
<language type="gsw"/>
</identity>
<posix>
<alias source="de" path="//ldml/posix"/>
</posix>
</ldml>
`

View file

@ -1,21 +0,0 @@
package cldr_test
import (
"fmt"
"golang.org/x/text/unicode/cldr"
)
func ExampleSlice() {
var dr *cldr.CLDR // assume this is initialized
x, _ := dr.LDML("en")
cs := x.Collations.Collation
// remove all but the default
cldr.MakeSlice(&cs).Filter(func(e cldr.Elem) bool {
return e.GetCommon().Type != x.Collations.Default()
})
for i, c := range cs {
fmt.Println(i, c.Type)
}
}

View file

@ -1,368 +0,0 @@
package cldr
import (
"fmt"
"log"
"reflect"
"testing"
)
func failOnError(err error) {
if err != nil {
log.Panic(err)
}
}
func data() *CLDR {
d := Decoder{}
data, err := d.Decode(testLoader{})
failOnError(err)
return data
}
type h struct {
A string `xml:"ha,attr"`
E string `xml:"he"`
D string `xml:",chardata"`
X string
}
type fieldTest struct {
Common
To string `xml:"to,attr"`
Key string `xml:"key,attr"`
E string `xml:"e"`
D string `xml:",chardata"`
X string
h
}
var testStruct = fieldTest{
Common: Common{
name: "mapping", // exclude "type" as distinguishing attribute
Type: "foo",
Alt: "foo",
},
To: "nyc",
Key: "k",
E: "E",
D: "D",
h: h{
A: "A",
E: "E",
D: "D",
},
}
func TestIter(t *testing.T) {
tests := map[string]string{
"Type": "foo",
"Alt": "foo",
"To": "nyc",
"A": "A",
"Alias": "<nil>",
}
k := 0
for i := iter(reflect.ValueOf(testStruct)); !i.done(); i.next() {
v := i.value()
if v.Kind() == reflect.Ptr && v.Elem().Kind() == reflect.String {
v = v.Elem()
}
name := i.field().Name
if w, ok := tests[name]; ok {
s := fmt.Sprint(v.Interface())
if w != s {
t.Errorf("value: found %q; want %q", w, s)
}
delete(tests, name)
}
k++
}
if len(tests) != 0 {
t.Errorf("missing fields: %v", tests)
}
}
func TestFindField(t *testing.T) {
tests := []struct {
name, val string
exist bool
}{
{"type", "foo", true},
{"alt", "foo", true},
{"to", "nyc", true},
{"he", "E", true},
{"q", "", false},
}
vf := reflect.ValueOf(testStruct)
for i, tt := range tests {
v, err := findField(vf, tt.name)
if (err == nil) != tt.exist {
t.Errorf("%d: field %q present is %v; want %v", i, tt.name, err == nil, tt.exist)
} else if tt.exist {
if v.Kind() == reflect.Ptr {
if v.IsNil() {
continue
}
v = v.Elem()
}
if v.String() != tt.val {
t.Errorf("%d: found value %q; want %q", i, v.String(), tt.val)
}
}
}
}
var keyTests = []struct {
exclude []string
key string
}{
{[]string{}, "alt=foo;key=k;to=nyc"},
{[]string{"type"}, "alt=foo;key=k;to=nyc"},
{[]string{"choice"}, "alt=foo;key=k;to=nyc"},
{[]string{"alt"}, "key=k;to=nyc"},
{[]string{"a"}, "alt=foo;key=k;to=nyc"},
{[]string{"to"}, "alt=foo;key=k"},
{[]string{"alt", "to"}, "key=k"},
{[]string{"alt", "to", "key"}, ""},
}
func TestAttrKey(t *testing.T) {
v := reflect.ValueOf(&testStruct)
for i, tt := range keyTests {
key := attrKey(v, tt.exclude...)
if key != tt.key {
t.Errorf("%d: found %q, want %q", i, key, tt.key)
}
}
}
func TestKey(t *testing.T) {
for i, tt := range keyTests {
key := Key(&testStruct, tt.exclude...)
if key != tt.key {
t.Errorf("%d: found %q, want %q", i, key, tt.key)
}
}
}
func testEnclosing(t *testing.T, x *LDML, name string) {
eq := func(a, b Elem, i int) {
for ; i > 0; i-- {
b = b.enclosing()
}
if a != b {
t.Errorf("%s: found path %q, want %q", name, getPath(a), getPath(b))
}
}
eq(x, x, 0)
eq(x, x.Identity, 1)
eq(x, x.Dates.Calendars, 2)
eq(x, x.Dates.Calendars.Calendar[0], 3)
eq(x, x.Dates.Calendars.Calendar[1], 3)
//eq(x, x.Dates.Calendars.Calendar[0].Months, 4)
eq(x, x.Dates.Calendars.Calendar[1].Months, 4)
}
func TestEnclosing(t *testing.T) {
testEnclosing(t, data().RawLDML("de"), "enclosing-raw")
de, _ := data().LDML("de")
testEnclosing(t, de, "enclosing")
}
func TestDeepCopy(t *testing.T) {
eq := func(have, want string) {
if have != want {
t.Errorf("found %q; want %q", have, want)
}
}
x, _ := data().LDML("de")
vc := deepCopy(reflect.ValueOf(x))
c := vc.Interface().(*LDML)
linkEnclosing(nil, c)
if x == c {
t.Errorf("did not copy")
}
eq(c.name, "ldml")
eq(c.Dates.name, "dates")
testEnclosing(t, c, "deepCopy")
}
type getTest struct {
loc string
path string
field string // used in combination with length
data string
altData string // used for buddhist calendar if value != ""
typ string
length int
missing bool
}
const (
budMon = "dates/calendars/calendar[@type='buddhist']/months/"
chnMon = "dates/calendars/calendar[@type='chinese']/months/"
greMon = "dates/calendars/calendar[@type='gregorian']/months/"
)
func monthVal(path, context, width string, month int) string {
const format = "%s/monthContext[@type='%s']/monthWidth[@type='%s']/month[@type='%d']"
return fmt.Sprintf(format, path, context, width, month)
}
var rootGetTests = []getTest{
{loc: "root", path: "identity/language", typ: "root"},
{loc: "root", path: "characters/moreInformation", data: "?"},
{loc: "root", path: "characters", field: "exemplarCharacters", length: 3},
{loc: "root", path: greMon, field: "monthContext", length: 2},
{loc: "root", path: greMon + "monthContext[@type='format']/monthWidth[@type='narrow']", field: "month", length: 4},
{loc: "root", path: greMon + "monthContext[@type='stand-alone']/monthWidth[@type='wide']", field: "month", length: 4},
// unescaping character data
{loc: "root", path: "characters/exemplarCharacters[@type='punctuation']", data: `[\- — … ' " “ „ \& #]`},
// default resolution
{loc: "root", path: "dates/calendars/calendar", typ: "gregorian"},
// alias resolution
{loc: "root", path: budMon, field: "monthContext", length: 2},
// crossing but non-circular alias resolution
{loc: "root", path: budMon + "monthContext[@type='format']/monthWidth[@type='narrow']", field: "month", length: 4},
{loc: "root", path: budMon + "monthContext[@type='stand-alone']/monthWidth[@type='wide']", field: "month", length: 4},
{loc: "root", path: monthVal(greMon, "format", "wide", 1), data: "11"},
{loc: "root", path: monthVal(greMon, "format", "narrow", 2), data: "2"},
{loc: "root", path: monthVal(greMon, "stand-alone", "wide", 3), data: "33"},
{loc: "root", path: monthVal(greMon, "stand-alone", "narrow", 4), data: "4"},
{loc: "root", path: monthVal(budMon, "format", "wide", 1), data: "11"},
{loc: "root", path: monthVal(budMon, "format", "narrow", 2), data: "2"},
{loc: "root", path: monthVal(budMon, "stand-alone", "wide", 3), data: "33"},
{loc: "root", path: monthVal(budMon, "stand-alone", "narrow", 4), data: "4"},
}
// 19
var deGetTests = []getTest{
{loc: "de", path: "identity/language", typ: "de"},
{loc: "de", path: "posix", length: 2},
{loc: "de", path: "characters", field: "exemplarCharacters", length: 4},
{loc: "de", path: "characters/exemplarCharacters[@type='auxiliary']", data: `[á à ă]`},
// identity is a blocking element, so de should not inherit generation from root.
{loc: "de", path: "identity/generation", missing: true},
// default resolution
{loc: "root", path: "dates/calendars/calendar", typ: "gregorian"},
// absolute path alias resolution
{loc: "gsw", path: "posix", field: "messages", length: 1},
{loc: "gsw", path: "posix/messages/yesstr", data: "yes:y"},
}
// 27(greMon) - 52(budMon) - 77(chnMon)
func calGetTests(s string) []getTest {
tests := []getTest{
{loc: "de", path: s, length: 2},
{loc: "de", path: s + "monthContext[@type='format']/monthWidth[@type='wide']", field: "month", length: 5},
{loc: "de", path: monthVal(s, "format", "wide", 1), data: "11"},
{loc: "de", path: monthVal(s, "format", "wide", 2), data: "22"},
{loc: "de", path: monthVal(s, "format", "wide", 3), data: "Maerz", altData: "bbb"},
{loc: "de", path: monthVal(s, "format", "wide", 4), data: "April"},
{loc: "de", path: monthVal(s, "format", "wide", 5), data: "Mai"},
{loc: "de", path: s + "monthContext[@type='format']/monthWidth[@type='narrow']", field: "month", length: 5},
{loc: "de", path: monthVal(s, "format", "narrow", 1), data: "1"},
{loc: "de", path: monthVal(s, "format", "narrow", 2), data: "2"},
{loc: "de", path: monthVal(s, "format", "narrow", 3), data: "M", altData: "BBB"},
{loc: "de", path: monthVal(s, "format", "narrow", 4), data: "A"},
{loc: "de", path: monthVal(s, "format", "narrow", 5), data: "m"},
{loc: "de", path: s + "monthContext[@type='stand-alone']/monthWidth[@type='wide']", field: "month", length: 5},
{loc: "de", path: monthVal(s, "stand-alone", "wide", 1), data: "11"},
{loc: "de", path: monthVal(s, "stand-alone", "wide", 2), data: "22"},
{loc: "de", path: monthVal(s, "stand-alone", "wide", 3), data: "Maerz", altData: "bbb"},
{loc: "de", path: monthVal(s, "stand-alone", "wide", 4), data: "april"},
{loc: "de", path: monthVal(s, "stand-alone", "wide", 5), data: "mai"},
{loc: "de", path: s + "monthContext[@type='stand-alone']/monthWidth[@type='narrow']", field: "month", length: 5},
{loc: "de", path: monthVal(s, "stand-alone", "narrow", 1), data: "1"},
{loc: "de", path: monthVal(s, "stand-alone", "narrow", 2), data: "2"},
{loc: "de", path: monthVal(s, "stand-alone", "narrow", 3), data: "m"},
{loc: "de", path: monthVal(s, "stand-alone", "narrow", 4), data: "4"},
{loc: "de", path: monthVal(s, "stand-alone", "narrow", 5), data: "m"},
}
if s == budMon {
for i, t := range tests {
if t.altData != "" {
tests[i].data = t.altData
}
}
}
return tests
}
var getTests = append(rootGetTests,
append(deGetTests,
append(calGetTests(greMon),
append(calGetTests(budMon),
calGetTests(chnMon)...)...)...)...)
func TestPath(t *testing.T) {
d := data()
for i, tt := range getTests {
x, _ := d.LDML(tt.loc)
e, err := walkXPath(x, tt.path)
if err != nil {
if !tt.missing {
t.Errorf("%d:error: %v %v", i, err, tt.missing)
}
continue
}
if tt.missing {
t.Errorf("%d: missing is %v; want %v", i, e == nil, tt.missing)
continue
}
if tt.data != "" && e.GetCommon().Data() != tt.data {
t.Errorf("%d: data is %v; want %v", i, e.GetCommon().Data(), tt.data)
continue
}
if tt.typ != "" && e.GetCommon().Type != tt.typ {
t.Errorf("%d: type is %v; want %v", i, e.GetCommon().Type, tt.typ)
continue
}
if tt.field != "" {
slice, _ := findField(reflect.ValueOf(e), tt.field)
if slice.Len() != tt.length {
t.Errorf("%d: length is %v; want %v", i, slice.Len(), tt.length)
continue
}
}
}
}
func TestGet(t *testing.T) {
d := data()
for i, tt := range getTests {
x, _ := d.LDML(tt.loc)
e, err := Get(x, tt.path)
if err != nil {
if !tt.missing {
t.Errorf("%d:error: %v %v", i, err, tt.missing)
}
continue
}
if tt.missing {
t.Errorf("%d: missing is %v; want %v", i, e == nil, tt.missing)
continue
}
if tt.data != "" && e.GetCommon().Data() != tt.data {
t.Errorf("%d: data is %v; want %v", i, e.GetCommon().Data(), tt.data)
continue
}
if tt.typ != "" && e.GetCommon().Type != tt.typ {
t.Errorf("%d: type is %v; want %v", i, e.GetCommon().Type, tt.typ)
continue
}
if tt.field != "" {
slice, _ := findField(reflect.ValueOf(e), tt.field)
if slice.Len() != tt.length {
t.Errorf("%d: length is %v; want %v", i, slice.Len(), tt.length)
continue
}
}
}
}

View file

@ -1,175 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cldr
import (
"reflect"
"testing"
)
type testSlice []*Common
func mkElem(alt, typ, ref string) *Common {
return &Common{
Type: typ,
Reference: ref,
Alt: alt,
}
}
var (
testSlice1 = testSlice{
mkElem("1", "a", "i.a"),
mkElem("1", "b", "i.b"),
mkElem("1", "c", "i.c"),
mkElem("2", "b", "ii"),
mkElem("3", "c", "iii"),
mkElem("4", "a", "iv.a"),
mkElem("4", "d", "iv.d"),
}
testSliceE = testSlice{}
)
func panics(f func()) (panics bool) {
defer func() {
if err := recover(); err != nil {
panics = true
}
}()
f()
return panics
}
func TestMakeSlice(t *testing.T) {
foo := 1
bar := []int{}
tests := []struct {
i interface{}
panics bool
err string
}{
{&foo, true, "should panic when passed a pointer to the wrong type"},
{&bar, true, "should panic when slice element of the wrong type"},
{testSlice1, true, "should panic when passed a slice"},
{&testSlice1, false, "should not panic"},
}
for i, tt := range tests {
if panics(func() { MakeSlice(tt.i) }) != tt.panics {
t.Errorf("%d: %s", i, tt.err)
}
}
}
var anyOfTests = []struct {
sl testSlice
values []string
n int
}{
{testSliceE, []string{}, 0},
{testSliceE, []string{"1", "2", "3"}, 0},
{testSlice1, []string{}, 0},
{testSlice1, []string{"1"}, 3},
{testSlice1, []string{"2"}, 1},
{testSlice1, []string{"5"}, 0},
{testSlice1, []string{"1", "2", "3"}, 5},
}
func TestSelectAnyOf(t *testing.T) {
for i, tt := range anyOfTests {
sl := tt.sl
s := MakeSlice(&sl)
s.SelectAnyOf("alt", tt.values...)
if len(sl) != tt.n {
t.Errorf("%d: found len == %d; want %d", i, len(sl), tt.n)
}
}
sl := testSlice1
s := MakeSlice(&sl)
if !panics(func() { s.SelectAnyOf("foo") }) {
t.Errorf("should panic on non-existing attribute")
}
}
func TestFilter(t *testing.T) {
for i, tt := range anyOfTests {
sl := tt.sl
s := MakeSlice(&sl)
s.Filter(func(e Elem) bool {
v, _ := findField(reflect.ValueOf(e), "alt")
return in(tt.values, v.String())
})
if len(sl) != tt.n {
t.Errorf("%d: found len == %d; want %d", i, len(sl), tt.n)
}
}
}
func TestGroup(t *testing.T) {
f := func(excl ...string) func(Elem) string {
return func(e Elem) string {
return Key(e, excl...)
}
}
tests := []struct {
sl testSlice
f func(Elem) string
lens []int
}{
{testSliceE, f(), []int{}},
{testSlice1, f(), []int{1, 1, 1, 1, 1, 1, 1}},
{testSlice1, f("type"), []int{3, 1, 1, 2}},
{testSlice1, f("alt"), []int{2, 2, 2, 1}},
{testSlice1, f("alt", "type"), []int{7}},
{testSlice1, f("alt", "type"), []int{7}},
}
for i, tt := range tests {
sl := tt.sl
s := MakeSlice(&sl)
g := s.Group(tt.f)
if len(tt.lens) != len(g) {
t.Errorf("%d: found %d; want %d", i, len(g), len(tt.lens))
continue
}
for j, v := range tt.lens {
if n := g[j].Value().Len(); n != v {
t.Errorf("%d: found %d for length of group %d; want %d", i, n, j, v)
}
}
}
}
func TestSelectOnePerGroup(t *testing.T) {
tests := []struct {
sl testSlice
attr string
values []string
refs []string
}{
{testSliceE, "alt", []string{"1"}, []string{}},
{testSliceE, "type", []string{"a"}, []string{}},
{testSlice1, "alt", []string{"2", "3", "1"}, []string{"i.a", "ii", "iii"}},
{testSlice1, "alt", []string{"1", "4"}, []string{"i.a", "i.b", "i.c", "iv.d"}},
{testSlice1, "type", []string{"c", "d"}, []string{"i.c", "iii", "iv.d"}},
}
for i, tt := range tests {
sl := tt.sl
s := MakeSlice(&sl)
s.SelectOnePerGroup(tt.attr, tt.values)
if len(sl) != len(tt.refs) {
t.Errorf("%d: found result length %d; want %d", i, len(sl), len(tt.refs))
continue
}
for j, e := range sl {
if tt.refs[j] != e.Reference {
t.Errorf("%d:%d found %s; want %s", i, j, e.Reference, tt.refs[i])
}
}
}
sl := testSlice1
s := MakeSlice(&sl)
if !panics(func() { s.SelectOnePerGroup("foo", nil) }) {
t.Errorf("should panic on non-existing attribute")
}
}

View file

@ -636,6 +636,13 @@ type SupplementalData struct {
Path string `xml:"path,attr"`
} `xml:"rgPath"`
} `xml:"rgScope"`
LanguageGroups *struct {
Common
LanguageGroup []*struct {
Common
Parent string `xml:"parent,attr"`
} `xml:"languageGroup"`
} `xml:"languageGroups"`
}
// LDML is the top-level type for locale-specific data.
@ -1484,4 +1491,4 @@ type Numbers struct {
}
// Version is the version of CLDR from which the XML definitions are generated.
const Version = "31"
const Version = "32"

View file

@ -1,130 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import "testing"
// TestCase is used for most tests.
type TestCase struct {
in []rune
out []rune
}
func runTests(t *testing.T, name string, fm Form, tests []TestCase) {
rb := reorderBuffer{}
rb.init(fm, nil)
for i, test := range tests {
rb.setFlusher(nil, appendFlush)
for j, rune := range test.in {
b := []byte(string(rune))
src := inputBytes(b)
info := rb.f.info(src, 0)
if j == 0 {
rb.ss.first(info)
} else {
rb.ss.next(info)
}
if rb.insertFlush(src, 0, info) < 0 {
t.Errorf("%s:%d: insert failed for rune %d", name, i, j)
}
}
rb.doFlush()
was := string(rb.out)
want := string(test.out)
if len(was) != len(want) {
t.Errorf("%s:%d: length = %d; want %d", name, i, len(was), len(want))
}
if was != want {
k, pfx := pidx(was, want)
t.Errorf("%s:%d: \nwas %s%+q; \nwant %s%+q", name, i, pfx, was[k:], pfx, want[k:])
}
}
}
func TestFlush(t *testing.T) {
const (
hello = "Hello "
world = "world!"
)
buf := make([]byte, maxByteBufferSize)
p := copy(buf, hello)
out := buf[p:]
rb := reorderBuffer{}
rb.initString(NFC, world)
if i := rb.flushCopy(out); i != 0 {
t.Errorf("wrote bytes on flush of empty buffer. (len(out) = %d)", i)
}
for i := range world {
// No need to set streamSafe values for this test.
rb.insertFlush(rb.src, i, rb.f.info(rb.src, i))
n := rb.flushCopy(out)
out = out[n:]
p += n
}
was := buf[:p]
want := hello + world
if string(was) != want {
t.Errorf(`output after flush was "%s"; want "%s"`, string(was), want)
}
if rb.nrune != 0 {
t.Errorf("non-null size of info buffer (rb.nrune == %d)", rb.nrune)
}
if rb.nbyte != 0 {
t.Errorf("non-null size of byte buffer (rb.nbyte == %d)", rb.nbyte)
}
}
var insertTests = []TestCase{
{[]rune{'a'}, []rune{'a'}},
{[]rune{0x300}, []rune{0x300}},
{[]rune{0x300, 0x316}, []rune{0x316, 0x300}}, // CCC(0x300)==230; CCC(0x316)==220
{[]rune{0x316, 0x300}, []rune{0x316, 0x300}},
{[]rune{0x41, 0x316, 0x300}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x41, 0x300, 0x316}, []rune{0x41, 0x316, 0x300}},
{[]rune{0x300, 0x316, 0x41}, []rune{0x316, 0x300, 0x41}},
{[]rune{0x41, 0x300, 0x40, 0x316}, []rune{0x41, 0x300, 0x40, 0x316}},
}
func TestInsert(t *testing.T) {
runTests(t, "TestInsert", NFD, insertTests)
}
var decompositionNFDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x01C4}},
{[]rune{0x320E}, []rune{0x320E}},
{[]rune("음ẻ과"), []rune{0x110B, 0x1173, 0x11B7, 0x65, 0x309, 0x1100, 0x116A}},
}
var decompositionNFKDTest = []TestCase{
{[]rune{0xC0}, []rune{0x41, 0x300}},
{[]rune{0xAC00}, []rune{0x1100, 0x1161}},
{[]rune{0x01C4}, []rune{0x44, 0x5A, 0x030C}},
{[]rune{0x320E}, []rune{0x28, 0x1100, 0x1161, 0x29}},
}
func TestDecomposition(t *testing.T) {
runTests(t, "TestDecompositionNFD", NFD, decompositionNFDTest)
runTests(t, "TestDecompositionNFKD", NFKD, decompositionNFKDTest)
}
var compositionTest = []TestCase{
{[]rune{0x41, 0x300}, []rune{0xC0}},
{[]rune{0x41, 0x316}, []rune{0x41, 0x316}},
{[]rune{0x41, 0x300, 0x35D}, []rune{0xC0, 0x35D}},
{[]rune{0x41, 0x316, 0x300}, []rune{0xC0, 0x316}},
// blocking starter
{[]rune{0x41, 0x316, 0x40, 0x300}, []rune{0x41, 0x316, 0x40, 0x300}},
{[]rune{0x1100, 0x1161}, []rune{0xAC00}},
// parenthesized Hangul, alternate between ASCII and Hangul.
{[]rune{0x28, 0x1100, 0x1161, 0x29}, []rune{0x28, 0xAC00, 0x29}},
}
func TestComposition(t *testing.T) {
runTests(t, "TestComposition", NFC, compositionTest)
}

View file

@ -1,82 +0,0 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"bytes"
"fmt"
"unicode/utf8"
"golang.org/x/text/unicode/norm"
)
// EqualSimple uses a norm.Iter to compare two non-normalized
// strings for equivalence.
func EqualSimple(a, b string) bool {
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
}
return ia.Done() && ib.Done()
}
// FindPrefix finds the longest common prefix of ASCII characters
// of a and b.
func FindPrefix(a, b string) int {
i := 0
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
}
return i
}
// EqualOpt is like EqualSimple, but optimizes the special
// case for ASCII characters.
func EqualOpt(a, b string) bool {
n := FindPrefix(a, b)
a, b = a[n:], b[n:]
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
ia.Seek(n, 1)
ib.Seek(n, 1)
}
}
return ia.Done() && ib.Done()
}
var compareTests = []struct{ a, b string }{
{"aaa", "aaa"},
{"aaa", "aab"},
{"a\u0300a", "\u00E0a"},
{"a\u0300\u0320b", "a\u0320\u0300b"},
{"\u1E0A\u0323", "\x44\u0323\u0307"},
// A character that decomposes into multiple segments
// spans several iterations.
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
}
func ExampleIter() {
for i, t := range compareTests {
r0 := EqualSimple(t.a, t.b)
r1 := EqualOpt(t.a, t.b)
fmt.Printf("%d: %v %v\n", i, r0, r1)
}
// Output:
// 0: true true
// 1: false false
// 2: true true
// 3: true true
// 4: true true
// 5: true true
}

View file

@ -1,27 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"fmt"
"golang.org/x/text/unicode/norm"
)
func ExampleForm_NextBoundary() {
s := norm.NFD.String("Mêlée")
for i := 0; i < len(s); {
d := norm.NFC.NextBoundaryInString(s[i:], true)
fmt.Printf("%[1]s: %+[1]q\n", s[i:i+d])
i += d
}
// Output:
// M: "M"
// ê: "e\u0302"
// l: "l"
// é: "e\u0301"
// e: "e"
}

View file

@ -1,54 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build test
package norm
import "testing"
func TestProperties(t *testing.T) {
var d runeData
CK := [2]string{"C", "K"}
for k, r := 1, rune(0); r < 0x2ffff; r++ {
if k < len(testData) && r == testData[k].r {
d = testData[k]
k++
}
s := string(r)
for j, p := range []Properties{NFC.PropertiesString(s), NFKC.PropertiesString(s)} {
f := d.f[j]
if p.CCC() != d.ccc {
t.Errorf("%U: ccc(%s): was %d; want %d %X", r, CK[j], p.CCC(), d.ccc, p.index)
}
if p.isYesC() != (f.qc == Yes) {
t.Errorf("%U: YesC(%s): was %v; want %v", r, CK[j], p.isYesC(), f.qc == Yes)
}
if p.combinesBackward() != (f.qc == Maybe) {
t.Errorf("%U: combines backwards(%s): was %v; want %v", r, CK[j], p.combinesBackward(), f.qc == Maybe)
}
if p.nLeadingNonStarters() != d.nLead {
t.Errorf("%U: nLead(%s): was %d; want %d %#v %#v", r, CK[j], p.nLeadingNonStarters(), d.nLead, p, d)
}
if p.nTrailingNonStarters() != d.nTrail {
t.Errorf("%U: nTrail(%s): was %d; want %d %#v %#v", r, CK[j], p.nTrailingNonStarters(), d.nTrail, p, d)
}
if p.combinesForward() != f.combinesForward {
t.Errorf("%U: combines forward(%s): was %v; want %v %#v", r, CK[j], p.combinesForward(), f.combinesForward, p)
}
// Skip Hangul as it is algorithmically computed.
if r >= hangulBase && r < hangulEnd {
continue
}
if p.hasDecomposition() {
if has := f.decomposition != ""; !has {
t.Errorf("%U: hasDecomposition(%s): was %v; want %v", r, CK[j], p.hasDecomposition(), has)
}
if string(p.Decomposition()) != f.decomposition {
t.Errorf("%U: decomp(%s): was %+q; want %+q", r, CK[j], p.Decomposition(), f.decomposition)
}
}
}
}
}

View file

@ -1,98 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"strings"
"testing"
)
func doIterNorm(f Form, s string) []byte {
acc := []byte{}
i := Iter{}
i.InitString(f, s)
for !i.Done() {
acc = append(acc, i.Next()...)
}
return acc
}
func TestIterNext(t *testing.T) {
runNormTests(t, "IterNext", func(f Form, out []byte, s string) []byte {
return doIterNorm(f, string(append(out, s...)))
})
}
type SegmentTest struct {
in string
out []string
}
var segmentTests = []SegmentTest{
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
{rep('a', segSize) + "\u0300aa",
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
// U+0f73 is NOT treated as a starter as it is a modifier
{"a" + grave(29) + "\u0f73", []string{"a" + grave(29), cgj + "\u0f73"}},
{"a\u0f73", []string{"a\u0f73"}},
// U+ff9e is treated as a non-starter.
// TODO: should we? Note that this will only affect iteration, as whether
// or not we do so does not affect the normalization output and will either
// way result in consistent iteration output.
{"a" + grave(30) + "\uff9e", []string{"a" + grave(30), cgj + "\uff9e"}},
{"a\uff9e", []string{"a\uff9e"}},
}
var segmentTestsK = []SegmentTest{
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
// last segment of multi-segment decomposition needs normalization
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
{"\u320E", []string{"\x28", "\uAC00", "\x29"}},
// last segment should be copied to start of buffer.
{"\ufdfa", []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645", ""}},
{"\ufdfa" + grave(30), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), ""}},
{"\uFDFA" + grave(64), []string{"\u0635", "\u0644", "\u0649", " ", "\u0627", "\u0644", "\u0644", "\u0647", " ", "\u0639", "\u0644", "\u064a", "\u0647", " ", "\u0648", "\u0633", "\u0644", "\u0645" + grave(30), cgj + grave(30), cgj + grave(4), ""}},
// Hangul and Jamo are grouped together.
{"\uAC00", []string{"\u1100\u1161", ""}},
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
segmentTest(t, "SegmentTestKD", NFKD, segmentTestsK)
segmentTest(t, "SegmentTestKC", NFKC, segmentTestsK)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
for i, tt := range tests {
iter.InitString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
res := string(iter.Next())
t.Errorf(`%s:%d:%d: expected Done()==true, found segment %+q`, name, i, j, res)
}
continue
}
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
seg = f.String(seg)
if res := string(iter.Next()); res != seg {
t.Errorf(`%s:%d:%d" segment was %+q (%d); want %+q (%d)`, name, i, j, pc(res), len(res), pc(seg), len(seg))
}
}
}
}

View file

@ -795,7 +795,7 @@ func makeTables() {
}
fmt.Fprintf(w, "// Total size of tables: %dKB (%d bytes)\n", (size+512)/1024, size)
gen.WriteGoFile("tables.go", "norm", w.Bytes())
gen.WriteVersionedGoFile("tables.go", "norm", w.Bytes())
}
func printChars() {
@ -972,5 +972,5 @@ func printTestdata() {
}
}
fmt.Fprintln(w, "}")
gen.WriteGoFile("data_test.go", "norm", w.Bytes())
gen.WriteVersionedGoFile("data_test.go", "norm", w.Bytes())
}

File diff suppressed because it is too large Load diff

View file

@ -1,56 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bytes"
"fmt"
"testing"
)
var bufSizes = []int{1, 2, 3, 4, 5, 6, 7, 8, 100, 101, 102, 103, 4000, 4001, 4002, 4003}
func readFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
out = append(out, s...)
r := f.Reader(bytes.NewBuffer(out))
buf := make([]byte, size)
result := []byte{}
for n, err := 0, error(nil); err == nil; {
n, err = r.Read(buf)
result = append(result, buf[:n]...)
}
return result
}
}
func TestReader(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestReader%d", s)
runNormTests(t, name, readFunc(s))
}
}
func writeFunc(size int) appendFunc {
return func(f Form, out []byte, s string) []byte {
in := append(out, s...)
result := new(bytes.Buffer)
w := f.Writer(result)
buf := make([]byte, size)
for n := 0; len(in) > 0; in = in[n:] {
n = copy(buf, in)
_, _ = w.Write(buf[:n])
}
w.Close()
return result.Bytes()
}
}
func TestWriter(t *testing.T) {
for _, s := range bufSizes {
name := fmt.Sprintf("TestWriter%d", s)
runNormTests(t, name, writeFunc(s))
}
}

View file

@ -1,5 +1,7 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// +build go1.10
package norm
const (

7633
vendor/golang.org/x/text/unicode/norm/tables9.0.0.go generated vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,101 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"fmt"
"testing"
"golang.org/x/text/transform"
)
func TestTransform(t *testing.T) {
tests := []struct {
f Form
in, out string
eof bool
dstSize int
err error
}{
{NFC, "ab", "ab", true, 2, nil},
{NFC, "qx", "qx", true, 2, nil},
{NFD, "qx", "qx", true, 2, nil},
{NFC, "", "", true, 1, nil},
{NFD, "", "", true, 1, nil},
{NFC, "", "", false, 1, nil},
{NFD, "", "", false, 1, nil},
// Normalized segment does not fit in destination.
{NFD, "ö", "", true, 1, transform.ErrShortDst},
{NFD, "ö", "", true, 2, transform.ErrShortDst},
// As an artifact of the algorithm, only full segments are written.
// This is not strictly required, and some bytes could be written.
// In practice, for Transform to not block, the destination buffer
// should be at least MaxSegmentSize to work anyway and these edge
// conditions will be relatively rare.
{NFC, "ab", "", true, 1, transform.ErrShortDst},
// This is even true for inert runes.
{NFC, "qx", "", true, 1, transform.ErrShortDst},
{NFC, "a\u0300abc", "\u00e0a", true, 4, transform.ErrShortDst},
// We cannot write a segment if successive runes could still change the result.
{NFD, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFD, "a\u0300", "", false, 4, transform.ErrShortSrc},
{NFC, "ö", "", false, 3, transform.ErrShortSrc},
{NFC, "a\u0300", "", true, 1, transform.ErrShortDst},
// Theoretically could fit, but won't due to simplified checks.
{NFC, "a\u0300", "", true, 2, transform.ErrShortDst},
{NFC, "a\u0300", "", true, 3, transform.ErrShortDst},
{NFC, "a\u0300", "\u00e0", true, 4, nil},
{NFD, "öa\u0300", "o\u0308", false, 8, transform.ErrShortSrc},
{NFD, "öa\u0300ö", "o\u0308a\u0300", true, 8, transform.ErrShortDst},
{NFD, "öa\u0300ö", "o\u0308a\u0300", false, 12, transform.ErrShortSrc},
// Illegal input is copied verbatim.
{NFD, "\xbd\xb2=\xbc ", "\xbd\xb2=\xbc ", true, 8, nil},
}
b := make([]byte, 100)
for i, tt := range tests {
nDst, _, err := tt.f.Transform(b[:tt.dstSize], []byte(tt.in), tt.eof)
out := string(b[:nDst])
if out != tt.out || err != tt.err {
t.Errorf("%d: was %+q (%v); want %+q (%v)", i, out, err, tt.out, tt.err)
}
if want := tt.f.String(tt.in)[:nDst]; want != out {
t.Errorf("%d: incorrect normalization: was %+q; want %+q", i, out, want)
}
}
}
var transBufSizes = []int{
MaxTransformChunkSize,
3 * MaxTransformChunkSize / 2,
2 * MaxTransformChunkSize,
3 * MaxTransformChunkSize,
100 * MaxTransformChunkSize,
}
func doTransNorm(f Form, buf []byte, b []byte) []byte {
acc := []byte{}
for p := 0; p < len(b); {
nd, ns, _ := f.Transform(buf[:], b[p:], true)
p += ns
acc = append(acc, buf[:nd]...)
}
return acc
}
func TestTransformNorm(t *testing.T) {
for _, sz := range transBufSizes {
buf := make([]byte, sz)
runNormTests(t, fmt.Sprintf("Transform:%d", sz), func(f Form, out []byte, s string) []byte {
return doTransNorm(f, buf, append(out, s...))
})
}
}

View file

@ -1,275 +0,0 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm
import (
"bufio"
"bytes"
"fmt"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"testing"
"time"
"unicode/utf8"
"golang.org/x/text/internal/gen"
"golang.org/x/text/internal/testtext"
)
var once sync.Once
func skipShort(t *testing.T) {
testtext.SkipIfNotLong(t)
once.Do(func() { loadTestData(t) })
}
// This regression test runs the test set in NormalizationTest.txt
// (taken from http://www.unicode.org/Public/<unicode.Version>/ucd/).
//
// NormalizationTest.txt has form:
// @Part0 # Specific cases
// #
// 1E0A;1E0A;0044 0307;1E0A;0044 0307; # (Ḋ; Ḋ; D◌̇; Ḋ; D◌̇; ) LATIN CAPITAL LETTER D WITH DOT ABOVE
// 1E0C;1E0C;0044 0323;1E0C;0044 0323; # (Ḍ; Ḍ; D◌̣; Ḍ; D◌̣; ) LATIN CAPITAL LETTER D WITH DOT BELOW
//
// Each test has 5 columns (c1, c2, c3, c4, c5), where
// (c1, c2, c3, c4, c5) == (c1, NFC(c1), NFD(c1), NFKC(c1), NFKD(c1))
//
// CONFORMANCE:
// 1. The following invariants must be true for all conformant implementations
//
// NFC
// c2 == NFC(c1) == NFC(c2) == NFC(c3)
// c4 == NFC(c4) == NFC(c5)
//
// NFD
// c3 == NFD(c1) == NFD(c2) == NFD(c3)
// c5 == NFD(c4) == NFD(c5)
//
// NFKC
// c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5)
//
// NFKD
// c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5)
//
// 2. For every code point X assigned in this version of Unicode that is not
// specifically listed in Part 1, the following invariants must be true
// for all conformant implementations:
//
// X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X)
//
// Column types.
const (
cRaw = iota
cNFC
cNFD
cNFKC
cNFKD
cMaxColumns
)
// Holds data from NormalizationTest.txt
var part []Part
type Part struct {
name string
number int
tests []Test
}
type Test struct {
name string
partnr int
number int
r rune // used for character by character test
cols [cMaxColumns]string // Each has 5 entries, see below.
}
func (t Test) Name() string {
if t.number < 0 {
return part[t.partnr].name
}
return fmt.Sprintf("%s:%d", part[t.partnr].name, t.number)
}
var partRe = regexp.MustCompile(`@Part(\d) # (.*)$`)
var testRe = regexp.MustCompile(`^` + strings.Repeat(`([\dA-F ]+);`, 5) + ` # (.*)$`)
var counter int
// Load the data form NormalizationTest.txt
func loadTestData(t *testing.T) {
f := gen.OpenUCDFile("NormalizationTest.txt")
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) == 0 || line[0] == '#' {
continue
}
m := partRe.FindStringSubmatch(line)
if m != nil {
if len(m) < 3 {
t.Fatal("Failed to parse Part: ", line)
}
i, err := strconv.Atoi(m[1])
if err != nil {
t.Fatal(err)
}
name := m[2]
part = append(part, Part{name: name[:len(name)-1], number: i})
continue
}
m = testRe.FindStringSubmatch(line)
if m == nil || len(m) < 7 {
t.Fatalf(`Failed to parse: "%s" result: %#v`, line, m)
}
test := Test{name: m[6], partnr: len(part) - 1, number: counter}
counter++
for j := 1; j < len(m)-1; j++ {
for _, split := range strings.Split(m[j], " ") {
r, err := strconv.ParseUint(split, 16, 64)
if err != nil {
t.Fatal(err)
}
if test.r == 0 {
// save for CharacterByCharacterTests
test.r = rune(r)
}
var buf [utf8.UTFMax]byte
sz := utf8.EncodeRune(buf[:], rune(r))
test.cols[j-1] += string(buf[:sz])
}
}
part := &part[len(part)-1]
part.tests = append(part.tests, test)
}
if scanner.Err() != nil {
t.Fatal(scanner.Err())
}
}
func cmpResult(t *testing.T, tc *Test, name string, f Form, gold, test, result string) {
if gold != result {
t.Errorf("%s:%s: %s(%+q)=%+q; want %+q: %s",
tc.Name(), name, fstr[f], test, result, gold, tc.name)
}
}
func cmpIsNormal(t *testing.T, tc *Test, name string, f Form, test string, result, want bool) {
if result != want {
t.Errorf("%s:%s: %s(%+q)=%v; want %v", tc.Name(), name, fstr[f], test, result, want)
}
}
func doTest(t *testing.T, tc *Test, f Form, gold, test string) {
testb := []byte(test)
result := f.Bytes(testb)
cmpResult(t, tc, "Bytes", f, gold, test, string(result))
sresult := f.String(test)
cmpResult(t, tc, "String", f, gold, test, sresult)
acc := []byte{}
i := Iter{}
i.InitString(f, test)
for !i.Done() {
acc = append(acc, i.Next()...)
}
cmpResult(t, tc, "Iter.Next", f, gold, test, string(acc))
buf := make([]byte, 128)
acc = nil
for p := 0; p < len(testb); {
nDst, nSrc, _ := f.Transform(buf, testb[p:], true)
acc = append(acc, buf[:nDst]...)
p += nSrc
}
cmpResult(t, tc, "Transform", f, gold, test, string(acc))
for i := range test {
out := f.Append(f.Bytes([]byte(test[:i])), []byte(test[i:])...)
cmpResult(t, tc, fmt.Sprintf(":Append:%d", i), f, gold, test, string(out))
}
cmpIsNormal(t, tc, "IsNormal", f, test, f.IsNormal([]byte(test)), test == gold)
cmpIsNormal(t, tc, "IsNormalString", f, test, f.IsNormalString(test), test == gold)
}
func doConformanceTests(t *testing.T, tc *Test, partn int) {
for i := 0; i <= 2; i++ {
doTest(t, tc, NFC, tc.cols[1], tc.cols[i])
doTest(t, tc, NFD, tc.cols[2], tc.cols[i])
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
}
for i := 3; i <= 4; i++ {
doTest(t, tc, NFC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFD, tc.cols[4], tc.cols[i])
doTest(t, tc, NFKC, tc.cols[3], tc.cols[i])
doTest(t, tc, NFKD, tc.cols[4], tc.cols[i])
}
}
func TestCharacterByCharacter(t *testing.T) {
skipShort(t)
tests := part[1].tests
var last rune = 0
for i := 0; i <= len(tests); i++ { // last one is special case
var r rune
if i == len(tests) {
r = 0x2FA1E // Don't have to go to 0x10FFFF
} else {
r = tests[i].r
}
for last++; last < r; last++ {
// Check all characters that were not explicitly listed in the test.
tc := &Test{partnr: 1, number: -1}
char := string(last)
doTest(t, tc, NFC, char, char)
doTest(t, tc, NFD, char, char)
doTest(t, tc, NFKC, char, char)
doTest(t, tc, NFKD, char, char)
}
if i < len(tests) {
doConformanceTests(t, &tests[i], 1)
}
}
}
func TestStandardTests(t *testing.T) {
skipShort(t)
for _, j := range []int{0, 2, 3} {
for _, test := range part[j].tests {
doConformanceTests(t, &test, j)
}
}
}
// TestPerformance verifies that normalization is O(n). If any of the
// code does not properly check for maxCombiningChars, normalization
// may exhibit O(n**2) behavior.
func TestPerformance(t *testing.T) {
skipShort(t)
runtime.GOMAXPROCS(2)
success := make(chan bool, 1)
go func() {
buf := bytes.Repeat([]byte("\u035D"), 1024*1024)
buf = append(buf, "\u035B"...)
NFC.Append(nil, buf...)
success <- true
}()
timeout := time.After(1 * time.Second)
select {
case <-success:
// test completed before the timeout
case <-timeout:
t.Errorf(`unexpectedly long time to complete PerformanceTest`)
}
}

View file

@ -95,7 +95,7 @@ func main() {
fmt.Fprintf(w, "// Total size %d bytes (%d KiB)\n", size, size/1024)
gen.WriteGoFile("tables.go", "rangetable", w.Bytes())
gen.WriteVersionedGoFile("tables.go", "rangetable", w.Bytes())
}
func print(w io.Writer, rt *unicode.RangeTable) {

View file

@ -1,184 +0,0 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package rangetable
import (
"testing"
"unicode"
)
var (
maxRuneTable = &unicode.RangeTable{
R32: []unicode.Range32{
{unicode.MaxRune, unicode.MaxRune, 1},
},
}
overlap1 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x100, 0xfffc, 4},
},
R32: []unicode.Range32{
{0x100000, 0x10fffc, 4},
},
}
overlap2 = &unicode.RangeTable{
R16: []unicode.Range16{
{0x101, 0xfffd, 4},
},
R32: []unicode.Range32{
{0x100001, 0x10fffd, 3},
},
}
// The following table should be compacted into two entries for R16 and R32.
optimize = &unicode.RangeTable{
R16: []unicode.Range16{
{0x1, 0x1, 1},
{0x2, 0x2, 1},
{0x3, 0x3, 1},
{0x5, 0x5, 1},
{0x7, 0x7, 1},
{0x9, 0x9, 1},
{0xb, 0xf, 2},
},
R32: []unicode.Range32{
{0x10001, 0x10001, 1},
{0x10002, 0x10002, 1},
{0x10003, 0x10003, 1},
{0x10005, 0x10005, 1},
{0x10007, 0x10007, 1},
{0x10009, 0x10009, 1},
{0x1000b, 0x1000f, 2},
},
}
)
func TestMerge(t *testing.T) {
for i, tt := range [][]*unicode.RangeTable{
{unicode.Cc, unicode.Cf},
{unicode.L, unicode.Ll},
{unicode.L, unicode.Ll, unicode.Lu},
{unicode.Ll, unicode.Lu},
{unicode.M},
unicode.GraphicRanges,
cased,
// Merge R16 only and R32 only and vice versa.
{unicode.Khmer, unicode.Khudawadi},
{unicode.Imperial_Aramaic, unicode.Radical},
// Merge with empty.
{&unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, &unicode.RangeTable{}, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hiragana},
{unicode.Inherited, &unicode.RangeTable{}},
{&unicode.RangeTable{}, unicode.Hanunoo, &unicode.RangeTable{}},
// Hypothetical tables.
{maxRuneTable},
{overlap1, overlap2},
// Optimization
{optimize},
} {
rt := Merge(tt...)
for r := rune(0); r <= unicode.MaxRune; r++ {
if got, want := unicode.Is(rt, r), unicode.In(r, tt...); got != want {
t.Fatalf("%d:%U: got %v; want %v", i, r, got, want)
}
}
// Test optimization and correctness for R16.
for k := 0; k < len(rt.R16)-1; k++ {
if lo, hi := rt.R16[k].Lo, rt.R16[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R16[k].Hi, rt.R16[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R16[k].Hi+rt.R16[k].Stride == rt.R16[k+1].Lo {
t.Errorf("%d: missed optimization for R16 at %d between %X and %x",
i, k, rt.R16[k], rt.R16[k+1])
}
}
// Test optimization and correctness for R32.
for k := 0; k < len(rt.R32)-1; k++ {
if lo, hi := rt.R32[k].Lo, rt.R32[k].Hi; lo > hi {
t.Errorf("%d: Lo (%x) > Hi (%x)", i, lo, hi)
}
if hi, lo := rt.R32[k].Hi, rt.R32[k+1].Lo; hi >= lo {
t.Errorf("%d: Hi (%x) >= next Lo (%x)", i, hi, lo)
}
if rt.R32[k].Hi+rt.R32[k].Stride == rt.R32[k+1].Lo {
t.Errorf("%d: missed optimization for R32 at %d between %X and %X",
i, k, rt.R32[k], rt.R32[k+1])
}
}
}
}
const runes = "Hello World in 2015!,\U0010fffd"
func BenchmarkNotMerged(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, unicode.GraphicRanges...)
}
}
}
func BenchmarkMerged(t *testing.B) {
rt := Merge(unicode.GraphicRanges...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
var cased = []*unicode.RangeTable{
unicode.Lower,
unicode.Upper,
unicode.Title,
unicode.Other_Lowercase,
unicode.Other_Uppercase,
}
func BenchmarkNotMergedCased(t *testing.B) {
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.In(r, cased...)
}
}
}
func BenchmarkMergedCased(t *testing.B) {
// This reduces len(R16) from 243 to 82 and len(R32) from 65 to 35 for
// Unicode 7.0.0.
rt := Merge(cased...)
for i := 0; i < t.N; i++ {
for _, r := range runes {
unicode.Is(rt, r)
}
}
}
func BenchmarkInit(t *testing.B) {
for i := 0; i < t.N; i++ {
Merge(cased...)
Merge(unicode.GraphicRanges...)
}
}
func BenchmarkInit2(t *testing.B) {
// Hypothetical near-worst-case performance.
for i := 0; i < t.N; i++ {
Merge(overlap1, overlap2)
}
}

View file

@ -1,55 +0,0 @@
package rangetable
import (
"reflect"
"testing"
"unicode"
)
var (
empty = &unicode.RangeTable{}
many = &unicode.RangeTable{
R16: []unicode.Range16{{0, 0xffff, 5}},
R32: []unicode.Range32{{0x10004, 0x10009, 5}},
LatinOffset: 0,
}
)
func TestVisit(t *testing.T) {
Visit(empty, func(got rune) {
t.Error("call from empty RangeTable")
})
var want rune
Visit(many, func(got rune) {
if got != want {
t.Errorf("got %U; want %U", got, want)
}
want += 5
})
if want -= 5; want != 0x10009 {
t.Errorf("last run was %U; want U+10009", want)
}
}
func TestNew(t *testing.T) {
for i, rt := range []*unicode.RangeTable{
empty,
unicode.Co,
unicode.Letter,
unicode.ASCII_Hex_Digit,
many,
maxRuneTable,
} {
var got, want []rune
Visit(rt, func(r rune) {
want = append(want, r)
})
Visit(New(want...), func(r rune) {
got = append(got, r)
})
if !reflect.DeepEqual(got, want) {
t.Errorf("%d:\ngot %v;\nwant %v", i, got, want)
}
}
}

View file

@ -1,5 +1,7 @@
// Code generated by running "go generate" in golang.org/x/text. DO NOT EDIT.
// +build go1.10
package rangetable
//go:generate go run gen.go --versions=4.1.0,5.1.0,5.2.0,5.0.0,6.1.0,6.2.0,6.3.0,6.0.0,7.0.0,8.0.0,9.0.0,10.0.0

File diff suppressed because it is too large Load diff