Replace godep with dep

This commit is contained in:
Manuel de Brito Fontes 2017-10-06 17:26:14 -03:00
parent 1e7489927c
commit bf5616c65b
14883 changed files with 3937406 additions and 361781 deletions

View file

@ -5,7 +5,7 @@
//go:generate go run gen.go gen_trieval.go
// Package cases provides general and language-specific case mappers.
package cases
package cases // import "golang.org/x/text/cases"
import (
"golang.org/x/text/language"

438
vendor/golang.org/x/text/cases/context_test.go generated vendored Normal file
View file

@ -0,0 +1,438 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import (
"strings"
"testing"
"unicode"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/unicode/rangetable"
)
// The following definitions are taken directly from Chapter 3 of The Unicode
// Standard.
func propCased(r rune) bool {
return propLower(r) || propUpper(r) || unicode.IsTitle(r)
}
func propLower(r rune) bool {
return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
}
func propUpper(r rune) bool {
return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
}
func propIgnore(r rune) bool {
if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
return true
}
return caseIgnorable[r]
}
func hasBreakProp(r rune) bool {
// binary search over ranges
lo := 0
hi := len(breakProp)
for lo < hi {
m := lo + (hi-lo)/2
bp := &breakProp[m]
if bp.lo <= r && r <= bp.hi {
return true
}
if r < bp.lo {
hi = m
} else {
lo = m + 1
}
}
return false
}
func contextFromRune(r rune) *context {
c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
c.next()
return &c
}
func TestCaseProperties(t *testing.T) {
if unicode.Version != UnicodeVersion {
// Properties of existing code points may change by Unicode version, so
// we need to skip.
t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
}
assigned := rangetable.Assigned(UnicodeVersion)
coreVersion := rangetable.Assigned(unicode.Version)
for r := rune(0); r <= lastRuneForTesting; r++ {
if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
continue
}
c := contextFromRune(r)
if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
}
// New letters may change case types, but existing case pairings should
// not change. See Case Pair Stability in
// http://unicode.org/policies/stability_policy.html.
if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
if got, want := c.info.isCased(), propCased(r); got != want {
t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
}
if got, want := c.caseType() == cUpper, propUpper(r); got != want {
t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
}
if got, want := c.caseType() == cLower, propLower(r); got != want {
t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
}
}
if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
}
}
// TODO: get title case from unicode file.
}
func TestMapping(t *testing.T) {
assigned := rangetable.Assigned(UnicodeVersion)
coreVersion := rangetable.Assigned(unicode.Version)
if coreVersion == nil {
coreVersion = assigned
}
apply := func(r rune, f func(c *context) bool) string {
c := contextFromRune(r)
f(c)
return string(c.dst[:c.pDst])
}
for r, tt := range special {
if got, want := apply(r, lower), tt.toLower; got != want {
t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
}
if got, want := apply(r, title), tt.toTitle; got != want {
t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
}
if got, want := apply(r, upper), tt.toUpper; got != want {
t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
}
}
for r := rune(0); r <= lastRuneForTesting; r++ {
if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
continue
}
if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
continue
}
if _, ok := special[r]; ok {
continue
}
want := string(unicode.ToLower(r))
if got := apply(r, lower); got != want {
t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
}
want = string(unicode.ToUpper(r))
if got := apply(r, upper); got != want {
t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
}
want = string(unicode.ToTitle(r))
if got := apply(r, title); got != want {
t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
}
}
}
func runeFoldData(r rune) (x struct{ simple, full, special string }) {
x = foldMap[r]
if x.simple == "" {
x.simple = string(unicode.ToLower(r))
}
if x.full == "" {
x.full = string(unicode.ToLower(r))
}
if x.special == "" {
x.special = x.full
}
return
}
func TestFoldData(t *testing.T) {
assigned := rangetable.Assigned(UnicodeVersion)
coreVersion := rangetable.Assigned(unicode.Version)
if coreVersion == nil {
coreVersion = assigned
}
apply := func(r rune, f func(c *context) bool) (string, info) {
c := contextFromRune(r)
f(c)
return string(c.dst[:c.pDst]), c.info.cccType()
}
for r := rune(0); r <= lastRuneForTesting; r++ {
if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
continue
}
x := runeFoldData(r)
if got, info := apply(r, foldFull); got != x.full {
t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
}
// TODO: special and simple.
}
}
func TestCCC(t *testing.T) {
assigned := rangetable.Assigned(UnicodeVersion)
normVersion := rangetable.Assigned(norm.Version)
for r := rune(0); r <= lastRuneForTesting; r++ {
if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
continue
}
c := contextFromRune(r)
p := norm.NFC.PropertiesString(string(r))
want := cccOther
switch p.CCC() {
case 0:
want = cccZero
case above:
want = cccAbove
}
if got := c.info.cccType(); got != want {
t.Errorf("%U: got %x; want %x", r, got, want)
}
}
}
func TestWordBreaks(t *testing.T) {
for _, tt := range breakTest {
testtext.Run(t, tt, func(t *testing.T) {
parts := strings.Split(tt, "|")
want := ""
for _, s := range parts {
found := false
// This algorithm implements title casing given word breaks
// as defined in the Unicode standard 3.13 R3.
for _, r := range s {
title := unicode.ToTitle(r)
lower := unicode.ToLower(r)
if !found && title != lower {
found = true
want += string(title)
} else {
want += string(lower)
}
}
}
src := strings.Join(parts, "")
got := Title(language.Und).String(src)
if got != want {
t.Errorf("got %q; want %q", got, want)
}
})
}
}
func TestContext(t *testing.T) {
tests := []struct {
desc string
dstSize int
atEOF bool
src string
out string
nSrc int
err error
ops string
prefixArg string
prefixWant bool
}{{
desc: "next: past end, atEOF, no checkpoint",
dstSize: 10,
atEOF: true,
src: "12",
out: "",
nSrc: 2,
ops: "next;next;next",
// Test that calling prefix with a non-empty argument when the buffer
// is depleted returns false.
prefixArg: "x",
prefixWant: false,
}, {
desc: "next: not at end, atEOF, no checkpoint",
dstSize: 10,
atEOF: false,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortSrc,
ops: "next;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "next: past end, !atEOF, no checkpoint",
dstSize: 10,
atEOF: false,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortSrc,
ops: "next;next;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "next: past end, !atEOF, checkpoint",
dstSize: 10,
atEOF: false,
src: "12",
out: "",
nSrc: 2,
ops: "next;next;checkpoint;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "copy: exact count, atEOF, no checkpoint",
dstSize: 2,
atEOF: true,
src: "12",
out: "12",
nSrc: 2,
ops: "next;copy;next;copy;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "copy: past end, !atEOF, no checkpoint",
dstSize: 2,
atEOF: false,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortSrc,
ops: "next;copy;next;copy;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "copy: past end, !atEOF, checkpoint",
dstSize: 2,
atEOF: false,
src: "12",
out: "12",
nSrc: 2,
ops: "next;copy;next;copy;checkpoint;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "copy: short dst",
dstSize: 1,
atEOF: false,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortDst,
ops: "next;copy;next;copy;checkpoint;next",
prefixArg: "12",
prefixWant: false,
}, {
desc: "copy: short dst, checkpointed",
dstSize: 1,
atEOF: false,
src: "12",
out: "1",
nSrc: 1,
err: transform.ErrShortDst,
ops: "next;copy;checkpoint;next;copy;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "writeString: simple",
dstSize: 3,
atEOF: true,
src: "1",
out: "1ab",
nSrc: 1,
ops: "next;copy;writeab;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "writeString: short dst",
dstSize: 2,
atEOF: true,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortDst,
ops: "next;copy;writeab;next",
prefixArg: "2",
prefixWant: true,
}, {
desc: "writeString: simple",
dstSize: 3,
atEOF: true,
src: "12",
out: "1ab",
nSrc: 2,
ops: "next;copy;next;writeab;next",
prefixArg: "",
prefixWant: true,
}, {
desc: "writeString: short dst",
dstSize: 2,
atEOF: true,
src: "12",
out: "",
nSrc: 0,
err: transform.ErrShortDst,
ops: "next;copy;next;writeab;next",
prefixArg: "1",
prefixWant: false,
}, {
desc: "prefix",
dstSize: 2,
atEOF: true,
src: "12",
out: "",
nSrc: 0,
// Context will assign an ErrShortSrc if the input wasn't exhausted.
err: transform.ErrShortSrc,
prefixArg: "12",
prefixWant: true,
}}
for _, tt := range tests {
c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
for _, op := range strings.Split(tt.ops, ";") {
switch op {
case "next":
c.next()
case "checkpoint":
c.checkpoint()
case "writeab":
c.writeString("ab")
case "copy":
c.copy()
case "":
default:
t.Fatalf("unknown op %q", op)
}
}
if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
}
nDst, nSrc, err := c.ret()
if err != tt.err {
t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
}
if out := string(c.dst[:nDst]); out != tt.out {
t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
}
if nSrc != tt.nSrc {
t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)
}
}
}

53
vendor/golang.org/x/text/cases/example_test.go generated vendored Normal file
View file

@ -0,0 +1,53 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases_test
import (
"fmt"
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
func Example() {
src := []string{
"hello world!",
"i with dot",
"'n ijsberg",
"here comes O'Brian",
}
for _, c := range []cases.Caser{
cases.Lower(language.Und),
cases.Upper(language.Turkish),
cases.Title(language.Dutch),
cases.Title(language.Und, cases.NoLower),
} {
fmt.Println()
for _, s := range src {
fmt.Println(c.String(s))
}
}
// Output:
// hello world!
// i with dot
// 'n ijsberg
// here comes o'brian
//
// HELLO WORLD!
// İ WİTH DOT
// 'N İJSBERG
// HERE COMES O'BRİAN
//
// Hello World!
// I With Dot
// 'n IJsberg
// Here Comes O'brian
//
// Hello World!
// I With Dot
// 'N Ijsberg
// Here Comes O'Brian
}

51
vendor/golang.org/x/text/cases/fold_test.go generated vendored Normal file
View file

@ -0,0 +1,51 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import (
"testing"
"golang.org/x/text/internal/testtext"
)
var foldTestCases = []string{
"βß\u13f8", // "βssᏰ"
"ab\u13fc\uab7aꭰ", // ab
"affifflast", // affifflast
"Iİiı\u0345", // ii̇iıι
"µµΜΜςσΣΣ", // μμμμσσσσ
}
func TestFold(t *testing.T) {
for _, tc := range foldTestCases {
testEntry := func(name string, c Caser, m func(r rune) string) {
want := ""
for _, r := range tc {
want += m(r)
}
if got := c.String(tc); got != want {
t.Errorf("%s(%s) = %+q; want %+q", name, tc, got, want)
}
dst := make([]byte, 256) // big enough to hold any result
src := []byte(tc)
v := testtext.AllocsPerRun(20, func() {
c.Transform(dst, src, true)
})
if v > 0 {
t.Errorf("%s(%s): number of allocs was %f; want 0", name, tc, v)
}
}
testEntry("FullFold", Fold(), func(r rune) string {
return runeFoldData(r).full
})
// TODO:
// testEntry("SimpleFold", Fold(Compact), func(r rune) string {
// return runeFoldData(r).simple
// })
// testEntry("SpecialFold", Fold(Turkic), func(r rune) string {
// return runeFoldData(r).special
// })
}
}

210
vendor/golang.org/x/text/cases/icu_test.go generated vendored Normal file
View file

@ -0,0 +1,210 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build icu
package cases
import (
"path"
"strings"
"testing"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/unicode/norm"
)
func TestICUConformance(t *testing.T) {
// Build test set.
input := []string{
"a.a a_a",
"a\u05d0a",
"\u05d0'a",
"a\u03084a",
"a\u0308a",
"a3\u30a3a",
"a\u303aa",
"a_\u303a_a",
"1_a..a",
"1_a.a",
"a..a.",
"a--a-",
"a-a-",
"a\u200ba",
"a\u200b\u200ba",
"a\u00ad\u00ada", // Format
"a\u00ada",
"a''a", // SingleQuote
"a'a",
"a::a", // MidLetter
"a:a",
"a..a", // MidNumLet
"a.a",
"a;;a", // MidNum
"a;a",
"a__a", // ExtendNumlet
"a_a",
"ΟΣ''a",
}
add := func(x interface{}) {
switch v := x.(type) {
case string:
input = append(input, v)
case []string:
for _, s := range v {
input = append(input, s)
}
}
}
for _, tc := range testCases {
add(tc.src)
add(tc.lower)
add(tc.upper)
add(tc.title)
}
for _, tc := range bufferTests {
add(tc.src)
}
for _, tc := range breakTest {
add(strings.Replace(tc, "|", "", -1))
}
for _, tc := range foldTestCases {
add(tc)
}
// Compare ICU to Go.
for _, c := range []string{"lower", "upper", "title", "fold"} {
for _, tag := range []string{
"und", "af", "az", "el", "lt", "nl", "tr",
} {
for _, s := range input {
if exclude(c, tag, s) {
continue
}
testtext.Run(t, path.Join(c, tag, s), func(t *testing.T) {
want := doICU(tag, c, s)
got := doGo(tag, c, s)
if norm.NFC.String(got) != norm.NFC.String(want) {
t.Errorf("\n in %[3]q (%+[3]q)\n got %[1]q (%+[1]q)\n want %[2]q (%+[2]q)", got, want, s)
}
})
}
}
}
}
// exclude indicates if a string should be excluded from testing.
func exclude(cm, tag, s string) bool {
list := []struct{ cm, tags, pattern string }{
// TODO: Go does not handle certain esoteric breaks correctly. This will be
// fixed once we have a real word break iterator. Alternatively, it
// seems like we're not too far off from making it work, so we could
// fix these last steps. But first verify that using a separate word
// breaker does not hurt performance.
{"title", "af nl", "a''a"},
{"", "", "א'a"},
// All the exclusions below seem to be issues with the ICU
// implementation (at version 57) and thus are not marked as TODO.
// ICU does not handle leading apostrophe for Dutch and
// Afrikaans correctly. See http://unicode.org/cldr/trac/ticket/7078.
{"title", "af nl", "'n"},
{"title", "af nl", "'N"},
// Go terminates the final sigma check after a fixed number of
// ignorables have been found. This ensures that the algorithm can make
// progress in a streaming scenario.
{"lower title", "", "\u039f\u03a3...............................a"},
// This also applies to upper in Greek.
// NOTE: we could fix the following two cases by adding state to elUpper
// and aztrLower. However, considering a modifier to not belong to the
// preceding letter after the maximum modifiers count is reached is
// consistent with the behavior of unicode/norm.
{"upper", "el", "\u03bf" + strings.Repeat("\u0321", 29) + "\u0313"},
{"lower", "az tr lt", "I" + strings.Repeat("\u0321", 30) + "\u0307\u0300"},
{"upper", "lt", "i" + strings.Repeat("\u0321", 30) + "\u0307\u0300"},
{"lower", "lt", "I" + strings.Repeat("\u0321", 30) + "\u0300"},
// ICU title case seems to erroneously removes \u0307 from an upper case
// I unconditionally, instead of only when lowercasing. The ICU
// transform algorithm transforms these cases consistently with our
// implementation.
{"title", "az tr", "\u0307"},
// The spec says to remove \u0307 after Soft-Dotted characters. ICU
// transforms conform but ucasemap_utf8ToUpper does not.
{"upper title", "lt", "i\u0307"},
{"upper title", "lt", "i" + strings.Repeat("\u0321", 29) + "\u0307\u0300"},
// Both Unicode and CLDR prescribe an extra explicit dot above after a
// Soft_Dotted character if there are other modifiers.
// ucasemap_utf8ToUpper does not do this; ICU transforms do.
// The issue with ucasemap_utf8ToUpper seems to be that it does not
// consider the modifiers that are part of composition in the evaluation
// of More_Above. For instance, according to the More_Above rule for lt,
// a dotted capital I (U+0130) becomes i\u0307\u0307 (an small i with
// two additional dots). This seems odd, but is correct. ICU is
// definitely not correct as it produces different results for different
// normal forms. For instance, for an İ:
// \u0130 (NFC) -> i\u0307 (incorrect)
// I\u0307 (NFD) -> i\u0307\u0307 (correct)
// We could argue that we should not add a \u0307 if there already is
// one, but this may be hard to get correct and is not conform the
// standard.
{"lower title", "lt", "\u0130"},
{"lower title", "lt", "\u00cf"},
// We are conform ICU ucasemap_utf8ToUpper if we remove support for
// elUpper. However, this is clearly not conform the spec. Moreover, the
// ICU transforms _do_ implement this transform and produces results
// consistent with our implementation. Note that we still prefer to use
// ucasemap_utf8ToUpper instead of transforms as the latter have
// inconsistencies in the word breaking algorithm.
{"upper", "el", "\u0386"}, // GREEK CAPITAL LETTER ALPHA WITH TONOS
{"upper", "el", "\u0389"}, // GREEK CAPITAL LETTER ETA WITH TONOS
{"upper", "el", "\u038A"}, // GREEK CAPITAL LETTER IOTA WITH TONOS
{"upper", "el", "\u0391"}, // GREEK CAPITAL LETTER ALPHA
{"upper", "el", "\u0397"}, // GREEK CAPITAL LETTER ETA
{"upper", "el", "\u0399"}, // GREEK CAPITAL LETTER IOTA
{"upper", "el", "\u03AC"}, // GREEK SMALL LETTER ALPHA WITH TONOS
{"upper", "el", "\u03AE"}, // GREEK SMALL LETTER ALPHA WITH ETA
{"upper", "el", "\u03AF"}, // GREEK SMALL LETTER ALPHA WITH IOTA
{"upper", "el", "\u03B1"}, // GREEK SMALL LETTER ALPHA
{"upper", "el", "\u03B7"}, // GREEK SMALL LETTER ETA
{"upper", "el", "\u03B9"}, // GREEK SMALL LETTER IOTA
}
for _, x := range list {
if x.cm != "" && strings.Index(x.cm, cm) == -1 {
continue
}
if x.tags != "" && strings.Index(x.tags, tag) == -1 {
continue
}
if strings.Index(s, x.pattern) != -1 {
return true
}
}
return false
}
func doGo(tag, caser, input string) string {
var c Caser
t := language.MustParse(tag)
switch caser {
case "lower":
c = Lower(t)
case "upper":
c = Upper(t)
case "title":
c = Title(t)
case "fold":
c = Fold()
}
return c.String(input)
}

950
vendor/golang.org/x/text/cases/map_test.go generated vendored Normal file
View file

@ -0,0 +1,950 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cases
import (
"bytes"
"fmt"
"path"
"strings"
"testing"
"unicode/utf8"
"golang.org/x/text/internal/testtext"
"golang.org/x/text/language"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
type testCase struct {
lang string
src interface{} // string, []string, or nil to skip test
title interface{} // string, []string, or nil to skip test
lower interface{} // string, []string, or nil to skip test
upper interface{} // string, []string, or nil to skip test
opts options
}
var testCases = []testCase{
0: {
lang: "und",
src: "abc aBc ABC abC İsıI ΕΣΆΣ",
title: "Abc Abc Abc Abc İsıi Εσάσ",
lower: "abc abc abc abc i\u0307sıi εσάσ",
upper: "ABC ABC ABC ABC İSII ΕΣΆΣ",
opts: getOpts(HandleFinalSigma(false)),
},
1: {
lang: "und",
src: "abc aBc ABC abC İsıI ΕΣΆΣ Σ _Σ -Σ",
title: "Abc Abc Abc Abc İsıi Εσάς Σ _Σ -Σ",
lower: "abc abc abc abc i\u0307sıi εσάς σ _σ -σ",
upper: "ABC ABC ABC ABC İSII ΕΣΆΣ Σ _Σ -Σ",
opts: getOpts(HandleFinalSigma(true)),
},
2: { // Title cased runes.
lang: supported,
src: "DžA",
title: "Dža",
lower: "dža",
upper: "DŽA",
},
3: {
// Title breaking.
lang: supported,
src: []string{
"FOO CASE TEST",
"DON'T DO THiS",
"χωΡΊΣ χωΡΊΣ^a χωΡΊΣ:a χωΡΊΣ:^a χωΡΊΣ^ όμΩΣ Σ",
"with-hyphens",
"49ers 49ers",
`"capitalize a^a -hyphen 0X _u a_u:a`,
"MidNumLet a.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
"MidNum a,b;c\u037ed\u0589e\u060cf\u2044g\ufe50h",
"\u0345 x\u3031x x\u05d0x \u05d0x a'.a a.a a4,a",
},
title: []string{
"Foo Case Test",
"Don't Do This",
"Χωρίς Χωρίσ^A Χωρίσ:a Χωρίσ:^A Χωρίς^ Όμως Σ",
"With-Hyphens",
// Note that 49Ers is correct according to the spec.
// TODO: provide some option to the user to treat different
// characters as cased.
"49Ers 49Ers",
`"Capitalize A^A -Hyphen 0X _U A_u:a`,
"Midnumlet A.b\u2018c\u2019d\u2024e\ufe52f\uff07f\uff0eg",
"Midnum A,B;C\u037eD\u0589E\u060cF\u2044G\ufe50H",
"\u0399 X\u3031X X\u05d0x \u05d0X A'.A A.a A4,A",
},
},
// TODO: These are known deviations from the options{} Unicode Word Breaking
// Algorithm.
// {
// "und",
// "x_\u3031_x a4,4a",
// "X_\u3031_x A4,4a", // Currently is "X_\U3031_X A4,4A".
// "x_\u3031_x a4,4a",
// "X_\u3031_X A4,4A",
// options{},
// },
4: {
// Tests title options
lang: "und",
src: "abc aBc ABC abC İsıI o'Brien",
title: "Abc ABc ABC AbC İsıI O'Brien",
opts: getOpts(NoLower),
},
5: {
lang: "el",
src: "aBc ΟΔΌΣ Οδός Σο ΣΟ Σ oΣ ΟΣ σ ἕξ \u03ac",
title: "Abc Οδός Οδός Σο Σο Σ Oς Ος Σ Ἕξ \u0386",
lower: "abc οδός οδός σο σο σ oς ος σ ἕξ \u03ac",
upper: "ABC ΟΔΟΣ ΟΔΟΣ ΣΟ ΣΟ Σ OΣ ΟΣ Σ ΕΞ \u0391", // Uppercase removes accents
},
6: {
lang: "tr az",
src: "Isiİ İsıI I\u0307sIiİ İsıI\u0307 I\u0300\u0307",
title: "Isii İsıı I\u0307sıii İsıi I\u0300\u0307",
lower: "ısii isıı isıii isıi \u0131\u0300\u0307",
upper: "ISİİ İSII I\u0307SIİİ İSII\u0307 I\u0300\u0307",
},
7: {
lang: "lt",
src: "I Ï J J̈ Į Į̈ Ì Í Ĩ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
title: "I Ï J J̈ Į Į̈ Ì Í Ĩ Xi̇̈ Xj̇̈ Xį̇̈ Xi̇̀ Xi̇́ Xi̇̃ Xi Xi̇̈ Xj Xj̇̈ Xį Xį̇̈ Xi̟̤",
lower: "i i̇̈ j j̇̈ į į̇̈ i̇̀ i̇́ i̇̃ xi̇̈ xj̇̈ xį̇̈ xi̇̀ xi̇́ xi̇̃ xi xi̇̈ xj xj̇̈ xį xį̇̈ xi̟̤",
upper: "I Ï J J̈ Į Į̈ Ì Í Ĩ XÏ XJ̈ XĮ̈ XÌ XÍ XĨ XI XÏ XJ XJ̈ XĮ XĮ̈ XI̟̤",
},
8: {
lang: "lt",
src: "\u012e\u0300 \u00cc i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
title: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
lower: "\u012f\u0307\u0300 i\u0307\u0300 i\u0307\u0300 i\u0307\u0301 i\u0307\u0303 i\u0307\u0308 i\u0300\u0307",
upper: "\u012e\u0300 \u00cc \u00cc \u00cd \u0128 \u00cf I\u0300\u0307",
},
9: {
lang: "nl",
src: "ijs IJs Ij Ijs İJ İJs aa aA 'ns 'S",
title: "IJs IJs IJ IJs İj İjs Aa Aa 'ns 's",
},
// Note: this specification is not currently part of CLDR. The same holds
// for the leading apostrophe handling for Dutch.
// See http://unicode.org/cldr/trac/ticket/7078.
10: {
lang: "af",
src: "wag 'n bietjie",
title: "Wag 'n Bietjie",
lower: "wag 'n bietjie",
upper: "WAG 'N BIETJIE",
},
}
func TestCaseMappings(t *testing.T) {
for i, tt := range testCases {
src, ok := tt.src.([]string)
if !ok {
src = strings.Split(tt.src.(string), " ")
}
for _, lang := range strings.Split(tt.lang, " ") {
tag := language.MustParse(lang)
testEntry := func(name string, mk func(language.Tag, options) transform.SpanningTransformer, gold interface{}) {
c := Caser{mk(tag, tt.opts)}
if gold != nil {
wants, ok := gold.([]string)
if !ok {
wants = strings.Split(gold.(string), " ")
}
for j, want := range wants {
if got := c.String(src[j]); got != want {
t.Errorf("%d:%s:\n%s.String(%+q):\ngot %+q;\nwant %+q", i, lang, name, src[j], got, want)
}
}
}
dst := make([]byte, 256) // big enough to hold any result
src := []byte(strings.Join(src, " "))
v := testtext.AllocsPerRun(20, func() {
c.Transform(dst, src, true)
})
if v > 1.1 {
t.Errorf("%d:%s:\n%s: number of allocs was %f; want 0", i, lang, name, v)
}
}
testEntry("Upper", makeUpper, tt.upper)
testEntry("Lower", makeLower, tt.lower)
testEntry("Title", makeTitle, tt.title)
}
}
}
// TestAlloc tests that some mapping methods should not cause any allocation.
func TestAlloc(t *testing.T) {
dst := make([]byte, 256) // big enough to hold any result
src := []byte(txtNonASCII)
for i, f := range []func() Caser{
func() Caser { return Upper(language.Und) },
func() Caser { return Lower(language.Und) },
func() Caser { return Lower(language.Und, HandleFinalSigma(false)) },
// TODO: use a shared copy for these casers as well, in order of
// importance, starting with the most important:
// func() Caser { return Title(language.Und) },
// func() Caser { return Title(language.Und, HandleFinalSigma(false)) },
} {
testtext.Run(t, "", func(t *testing.T) {
var c Caser
v := testtext.AllocsPerRun(10, func() {
c = f()
})
if v > 0 {
// TODO: Right now only Upper has 1 allocation. Special-case Lower
// and Title as well to have less allocations for the root locale.
t.Errorf("%d:init: number of allocs was %f; want 0", i, v)
}
v = testtext.AllocsPerRun(2, func() {
c.Transform(dst, src, true)
})
if v > 0 {
t.Errorf("%d:transform: number of allocs was %f; want 0", i, v)
}
})
}
}
func testHandover(t *testing.T, c Caser, src string) {
want := c.String(src)
// Find the common prefix.
pSrc := 0
for ; pSrc < len(src) && pSrc < len(want) && want[pSrc] == src[pSrc]; pSrc++ {
}
// Test handover for each substring of the prefix.
for i := 0; i < pSrc; i++ {
testtext.Run(t, fmt.Sprint("interleave/", i), func(t *testing.T) {
dst := make([]byte, 4*len(src))
c.Reset()
nSpan, _ := c.Span([]byte(src[:i]), false)
copy(dst, src[:nSpan])
nTransform, _, _ := c.Transform(dst[nSpan:], []byte(src[nSpan:]), true)
got := string(dst[:nSpan+nTransform])
if got != want {
t.Errorf("full string: got %q; want %q", got, want)
}
})
}
}
func TestHandover(t *testing.T) {
testCases := []struct {
desc string
t Caser
first, second string
}{{
"title/nosigma/single midword",
Title(language.Und, HandleFinalSigma(false)),
"A.", "a",
}, {
"title/nosigma/single midword",
Title(language.Und, HandleFinalSigma(false)),
"A", ".a",
}, {
"title/nosigma/double midword",
Title(language.Und, HandleFinalSigma(false)),
"A..", "a",
}, {
"title/nosigma/double midword",
Title(language.Und, HandleFinalSigma(false)),
"A.", ".a",
}, {
"title/nosigma/double midword",
Title(language.Und, HandleFinalSigma(false)),
"A", "..a",
}, {
"title/sigma/single midword",
Title(language.Und),
"ΟΣ.", "a",
}, {
"title/sigma/single midword",
Title(language.Und),
"ΟΣ", ".a",
}, {
"title/sigma/double midword",
Title(language.Und),
"ΟΣ..", "a",
}, {
"title/sigma/double midword",
Title(language.Und),
"ΟΣ.", ".a",
}, {
"title/sigma/double midword",
Title(language.Und),
"ΟΣ", "..a",
}, {
"title/af/leading apostrophe",
Title(language.Afrikaans),
"'", "n bietje",
}}
for _, tc := range testCases {
testtext.Run(t, tc.desc, func(t *testing.T) {
src := tc.first + tc.second
want := tc.t.String(src)
tc.t.Reset()
n, _ := tc.t.Span([]byte(tc.first), false)
dst := make([]byte, len(want))
copy(dst, tc.first[:n])
nDst, _, _ := tc.t.Transform(dst[n:], []byte(src[n:]), true)
got := string(dst[:n+nDst])
if got != want {
t.Errorf("got %q; want %q", got, want)
}
})
}
}
// minBufSize is the size of the buffer by which the casing operation in
// this package are guaranteed to make progress.
const minBufSize = norm.MaxSegmentSize
type bufferTest struct {
desc, src, want string
firstErr error
dstSize, srcSize int
t transform.SpanningTransformer
}
var bufferTests []bufferTest
func init() {
bufferTests = []bufferTest{{
desc: "und/upper/short dst",
src: "abcdefg",
want: "ABCDEFG",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Upper(language.Und),
}, {
desc: "und/upper/short src",
src: "123é56",
want: "123É56",
firstErr: transform.ErrShortSrc,
dstSize: 4,
srcSize: 4,
t: Upper(language.Und),
}, {
desc: "und/upper/no error on short",
src: "12",
want: "12",
firstErr: nil,
dstSize: 1,
srcSize: 1,
t: Upper(language.Und),
}, {
desc: "und/lower/short dst",
src: "ABCDEFG",
want: "abcdefg",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Lower(language.Und),
}, {
desc: "und/lower/short src",
src: "123É56",
want: "123é56",
firstErr: transform.ErrShortSrc,
dstSize: 4,
srcSize: 4,
t: Lower(language.Und),
}, {
desc: "und/lower/no error on short",
src: "12",
want: "12",
firstErr: nil,
dstSize: 1,
srcSize: 1,
t: Lower(language.Und),
}, {
desc: "und/lower/simple (no final sigma)",
src: "ΟΣ ΟΣΣ",
want: "οσ οσσ",
dstSize: minBufSize,
srcSize: minBufSize,
t: Lower(language.Und, HandleFinalSigma(false)),
}, {
desc: "und/title/simple (no final sigma)",
src: "ΟΣ ΟΣΣ",
want: "Οσ Οσσ",
dstSize: minBufSize,
srcSize: minBufSize,
t: Title(language.Und, HandleFinalSigma(false)),
}, {
desc: "und/title/final sigma: no error",
src: "ΟΣ",
want: "Ος",
dstSize: minBufSize,
srcSize: minBufSize,
t: Title(language.Und),
}, {
desc: "und/title/final sigma: short source",
src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
firstErr: transform.ErrShortSrc,
dstSize: minBufSize,
srcSize: 10,
t: Title(language.Und),
}, {
desc: "und/title/final sigma: short destination 1",
src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
firstErr: transform.ErrShortDst,
dstSize: 10,
srcSize: minBufSize,
t: Title(language.Und),
}, {
desc: "und/title/final sigma: short destination 2",
src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
firstErr: transform.ErrShortDst,
dstSize: 9,
srcSize: minBufSize,
t: Title(language.Und),
}, {
desc: "und/title/final sigma: short destination 3",
src: "ΟΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣΣ",
want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσς",
firstErr: transform.ErrShortDst,
dstSize: 8,
srcSize: minBufSize,
t: Title(language.Und),
}, {
desc: "und/title/clipped UTF-8 rune",
src: "σσσσσσσσσσσ",
want: "Σσσσσσσσσσσ",
firstErr: transform.ErrShortSrc,
dstSize: minBufSize,
srcSize: 5,
t: Title(language.Und),
}, {
desc: "und/title/clipped UTF-8 rune atEOF",
src: "σσσ" + string([]byte{0xCF}),
want: "Σσσ" + string([]byte{0xCF}),
dstSize: minBufSize,
srcSize: minBufSize,
t: Title(language.Und),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/final sigma: max ignorables",
src: "ΟΣ" + strings.Repeat(".", maxIgnorable) + "a",
want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
dstSize: minBufSize,
srcSize: minBufSize,
t: Title(language.Und),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/long string",
src: "AA" + strings.Repeat(".", maxIgnorable+1) + "a",
want: "Aa" + strings.Repeat(".", maxIgnorable+1) + "A",
dstSize: minBufSize,
srcSize: len("AA" + strings.Repeat(".", maxIgnorable+1)),
t: Title(language.Und),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/final sigma: too many ignorables",
src: "ΟΣ" + strings.Repeat(".", maxIgnorable+1) + "a",
want: "Ος" + strings.Repeat(".", maxIgnorable+1) + "A",
dstSize: minBufSize,
srcSize: len("ΟΣ" + strings.Repeat(".", maxIgnorable+1)),
t: Title(language.Und),
}, {
desc: "und/title/final sigma: apostrophe",
src: "ΟΣ''a",
want: "Οσ''A",
dstSize: minBufSize,
srcSize: minBufSize,
t: Title(language.Und),
}, {
desc: "el/upper/max ignorables",
src: "ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
want: "Ο" + strings.Repeat("\u0321", maxIgnorable-1),
dstSize: minBufSize,
srcSize: minBufSize,
t: Upper(language.Greek),
}, {
desc: "el/upper/too many ignorables",
src: "ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
want: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
dstSize: minBufSize,
srcSize: len("ο" + strings.Repeat("\u0321", maxIgnorable)),
t: Upper(language.Greek),
}, {
desc: "el/upper/short dst",
src: "123ο",
want: "123Ο",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Upper(language.Greek),
}, {
desc: "lt/lower/max ignorables",
src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
dstSize: minBufSize,
srcSize: minBufSize,
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/too many ignorables",
src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
want: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
dstSize: minBufSize,
srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/decomposition with short dst buffer 1",
src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
firstErr: transform.ErrShortDst,
want: "aaaaai\u0307\u0300",
dstSize: 5,
srcSize: minBufSize,
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/decomposition with short dst buffer 2",
src: "aaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
firstErr: transform.ErrShortDst,
want: "aaaai\u0307\u0300",
dstSize: 5,
srcSize: minBufSize,
t: Lower(language.Lithuanian),
}, {
desc: "lt/upper/max ignorables",
src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
want: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
dstSize: minBufSize,
srcSize: minBufSize,
t: Upper(language.Lithuanian),
}, {
desc: "lt/upper/too many ignorables",
src: "i" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
want: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
dstSize: minBufSize,
srcSize: len("i" + strings.Repeat("\u0321", maxIgnorable)),
t: Upper(language.Lithuanian),
}, {
desc: "lt/upper/short dst",
src: "12i\u0307\u0300",
want: "12\u00cc",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Upper(language.Lithuanian),
}, {
desc: "aztr/lower/max ignorables",
src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
dstSize: minBufSize,
srcSize: minBufSize,
t: Lower(language.Turkish),
}, {
desc: "aztr/lower/too many ignorables",
src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
want: "\u0131" + strings.Repeat("\u0321", maxIgnorable) + "\u0307\u0300",
dstSize: minBufSize,
srcSize: len("I" + strings.Repeat("\u0321", maxIgnorable)),
t: Lower(language.Turkish),
}, {
desc: "nl/title/pre-IJ cutoff",
src: " ij",
want: " IJ",
firstErr: transform.ErrShortDst,
dstSize: 2,
srcSize: minBufSize,
t: Title(language.Dutch),
}, {
desc: "nl/title/mid-IJ cutoff",
src: " ij",
want: " IJ",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Title(language.Dutch),
}, {
desc: "af/title/apostrophe",
src: "'n bietje",
want: "'n Bietje",
firstErr: transform.ErrShortDst,
dstSize: 3,
srcSize: minBufSize,
t: Title(language.Afrikaans),
}}
}
func TestShortBuffersAndOverflow(t *testing.T) {
for i, tt := range bufferTests {
testtext.Run(t, tt.desc, func(t *testing.T) {
buf := make([]byte, tt.dstSize)
got := []byte{}
var nSrc, nDst int
var err error
for p := 0; p < len(tt.src); p += nSrc {
q := p + tt.srcSize
if q > len(tt.src) {
q = len(tt.src)
}
nDst, nSrc, err = tt.t.Transform(buf, []byte(tt.src[p:q]), q == len(tt.src))
got = append(got, buf[:nDst]...)
if p == 0 && err != tt.firstErr {
t.Errorf("%d:%s:\n error was %v; want %v", i, tt.desc, err, tt.firstErr)
break
}
}
if string(got) != tt.want {
t.Errorf("%d:%s:\ngot %+q;\nwant %+q", i, tt.desc, got, tt.want)
}
testHandover(t, Caser{tt.t}, tt.src)
})
}
}
func TestSpan(t *testing.T) {
for _, tt := range []struct {
desc string
src string
want string
atEOF bool
err error
t Caser
}{{
desc: "und/upper/basic",
src: "abcdefg",
want: "",
atEOF: true,
err: transform.ErrEndOfSpan,
t: Upper(language.Und),
}, {
desc: "und/upper/short src",
src: "123É"[:4],
want: "123",
atEOF: false,
err: transform.ErrShortSrc,
t: Upper(language.Und),
}, {
desc: "und/upper/no error on short",
src: "12",
want: "12",
atEOF: false,
t: Upper(language.Und),
}, {
desc: "und/lower/basic",
src: "ABCDEFG",
want: "",
atEOF: true,
err: transform.ErrEndOfSpan,
t: Lower(language.Und),
}, {
desc: "und/lower/short src num",
src: "123é"[:4],
want: "123",
atEOF: false,
err: transform.ErrShortSrc,
t: Lower(language.Und),
}, {
desc: "und/lower/short src greek",
src: "αβγé"[:7],
want: "αβγ",
atEOF: false,
err: transform.ErrShortSrc,
t: Lower(language.Und),
}, {
desc: "und/lower/no error on short",
src: "12",
want: "12",
atEOF: false,
t: Lower(language.Und),
}, {
desc: "und/lower/simple (no final sigma)",
src: "ος οσσ",
want: "οσ οσσ",
atEOF: true,
t: Lower(language.Und, HandleFinalSigma(false)),
}, {
desc: "und/title/simple (no final sigma)",
src: "Οσ Οσσ",
want: "Οσ Οσσ",
atEOF: true,
t: Title(language.Und, HandleFinalSigma(false)),
}, {
desc: "und/lower/final sigma: no error",
src: "οΣ", // Oς
want: "ο", // Oς
err: transform.ErrEndOfSpan,
t: Lower(language.Und),
}, {
desc: "und/title/final sigma: no error",
src: "ΟΣ", // Oς
want: "Ο", // Oς
err: transform.ErrEndOfSpan,
t: Title(language.Und),
}, {
desc: "und/title/final sigma: no short source!",
src: "ΟσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσΣ",
want: "Οσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσσ",
err: transform.ErrEndOfSpan,
t: Title(language.Und),
}, {
desc: "und/title/clipped UTF-8 rune",
src: "Σσ" + string([]byte{0xCF}),
want: "Σσ",
atEOF: false,
err: transform.ErrShortSrc,
t: Title(language.Und),
}, {
desc: "und/title/clipped UTF-8 rune atEOF",
src: "Σσσ" + string([]byte{0xCF}),
want: "Σσσ" + string([]byte{0xCF}),
atEOF: true,
t: Title(language.Und),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/long string",
src: "A" + strings.Repeat("a", maxIgnorable+5),
want: "A" + strings.Repeat("a", maxIgnorable+5),
t: Title(language.Und),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/cyrillic",
src: "При",
want: "При",
atEOF: true,
t: Title(language.Und, HandleFinalSigma(false)),
}, {
// Note: the choice to change the final sigma at the end in case of
// too many case ignorables is arbitrary. The main reason for this
// choice is that it results in simpler code.
desc: "und/title/final sigma: max ignorables",
src: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
want: "Οσ" + strings.Repeat(".", maxIgnorable) + "A",
t: Title(language.Und),
}, {
desc: "el/upper/max ignorables - not implemented",
src: "Ο" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0313",
want: "",
err: transform.ErrEndOfSpan,
t: Upper(language.Greek),
}, {
desc: "el/upper/too many ignorables - not implemented",
src: "Ο" + strings.Repeat("\u0321", maxIgnorable) + "\u0313",
want: "",
err: transform.ErrEndOfSpan,
t: Upper(language.Greek),
}, {
desc: "el/upper/short dst",
src: "123ο",
want: "",
err: transform.ErrEndOfSpan,
t: Upper(language.Greek),
}, {
desc: "lt/lower/max ignorables",
src: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
want: "i" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0307\u0300",
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/isLower",
src: "I" + strings.Repeat("\u0321", maxIgnorable) + "\u0300",
want: "",
err: transform.ErrEndOfSpan,
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/not identical",
src: "aaaaa\u00cc", // U+00CC LATIN CAPITAL LETTER I GRAVE
err: transform.ErrEndOfSpan,
want: "aaaaa",
t: Lower(language.Lithuanian),
}, {
desc: "lt/lower/identical",
src: "aaaai\u0307\u0300", // U+00CC LATIN CAPITAL LETTER I GRAVE
want: "aaaai\u0307\u0300",
t: Lower(language.Lithuanian),
}, {
desc: "lt/upper/not implemented",
src: "I" + strings.Repeat("\u0321", maxIgnorable-1) + "\u0300",
want: "",
err: transform.ErrEndOfSpan,
t: Upper(language.Lithuanian),
}, {
desc: "lt/upper/not implemented, ascii",
src: "AB",
want: "",
err: transform.ErrEndOfSpan,
t: Upper(language.Lithuanian),
}, {
desc: "nl/title/pre-IJ cutoff",
src: " IJ",
want: " IJ",
t: Title(language.Dutch),
}, {
desc: "nl/title/mid-IJ cutoff",
src: " Ia",
want: " Ia",
t: Title(language.Dutch),
}, {
desc: "af/title/apostrophe",
src: "'n Bietje",
want: "'n Bietje",
t: Title(language.Afrikaans),
}, {
desc: "af/title/apostrophe-incorrect",
src: "'N Bietje",
// The Single_Quote (a MidWord), needs to be retained as unspanned so
// that a successive call to Transform can detect that N should not be
// capitalized.
want: "",
err: transform.ErrEndOfSpan,
t: Title(language.Afrikaans),
}} {
testtext.Run(t, tt.desc, func(t *testing.T) {
for p := 0; p < len(tt.want); p += utf8.RuneLen([]rune(tt.src[p:])[0]) {
tt.t.Reset()
n, err := tt.t.Span([]byte(tt.src[:p]), false)
if err != nil && err != transform.ErrShortSrc {
t.Errorf("early failure:Span(%+q): %v (%d < %d)", tt.src[:p], err, n, len(tt.want))
break
}
}
tt.t.Reset()
n, err := tt.t.Span([]byte(tt.src), tt.atEOF)
if n != len(tt.want) || err != tt.err {
t.Errorf("Span(%+q, %v): got %d, %v; want %d, %v", tt.src, tt.atEOF, n, err, len(tt.want), tt.err)
}
testHandover(t, tt.t, tt.src)
})
}
}
var txtASCII = strings.Repeat("The quick brown fox jumps over the lazy dog. ", 50)
// Taken from http://creativecommons.org/licenses/by-sa/3.0/vn/
const txt_vn = `Với các điều kiện sau: Ghi nhận công của tác giả. Nếu bạn sử
dụng, chuyển đổi, hoặc xây dựng dự án từ nội dung được chia sẻ này, bạn phải áp
dụng giấy phép này hoặc một giấy phép khác các điều khoản tương tự như giấy
phép này cho dự án của bạn. Hiểu rằng: Miễn Bất kỳ các điều kiện nào trên đây
cũng thể được miễn bỏ nếu bạn được sự cho phép của người sở hữu bản quyền.
Phạm vi công chúng Khi tác phẩm hoặc bất kỳ chương nào của tác phẩm đã trong
vùng dành cho công chúng theo quy định của pháp luật thì tình trạng của không
bị ảnh hưởng bởi giấy phép trong bất kỳ trường hợp nào.`
// http://creativecommons.org/licenses/by-sa/2.5/cn/
const txt_cn = `您可以自由 复制发行展览表演放映
广播或通过信息网络传播本作品 创作演绎作品
对本作品进行商业性使用 惟须遵守下列条件
署名 您必须按照作者或者许可人指定的方式对作品进行署名
相同方式共享 如果您改变转换本作品或者以本作品为基础进行创作
您只能采用与本协议相同的许可协议发布基于本作品的演绎作品`
// Taken from http://creativecommons.org/licenses/by-sa/1.0/deed.ru
const txt_ru = `При обязательном соблюдении следующих условий: Attribution Вы
должны атрибутировать произведение (указывать автора и источник) в порядке,
предусмотренном автором или лицензиаром (но только так, чтобы никоим образом не
подразумевалось, что они поддерживают вас или использование вами данного
произведения). Υπό τις ακόλουθες προϋποθέσεις:`
// Taken from http://creativecommons.org/licenses/by-sa/3.0/gr/
const txt_gr = `Αναφορά Δημιουργού Θα πρέπει να κάνετε την αναφορά στο έργο με
τον τρόπο που έχει οριστεί από το δημιουργό ή το χορηγούντο την άδεια (χωρίς
όμως να εννοείται με οποιονδήποτε τρόπο ότι εγκρίνουν εσάς ή τη χρήση του έργου
από εσάς). Παρόμοια Διανομή Εάν αλλοιώσετε, τροποποιήσετε ή δημιουργήσετε
περαιτέρω βασισμένοι στο έργο θα μπορείτε να διανέμετε το έργο που θα προκύψει
μόνο με την ίδια ή παρόμοια άδεια.`
const txtNonASCII = txt_vn + txt_cn + txt_ru + txt_gr
// TODO: Improve ASCII performance.
func BenchmarkCasers(b *testing.B) {
for _, s := range []struct{ name, text string }{
{"ascii", txtASCII},
{"nonASCII", txtNonASCII},
{"short", "При"},
} {
src := []byte(s.text)
// Measure case mappings in bytes package for comparison.
for _, f := range []struct {
name string
fn func(b []byte) []byte
}{
{"lower", bytes.ToLower},
{"title", bytes.ToTitle},
{"upper", bytes.ToUpper},
} {
testtext.Bench(b, path.Join(s.name, "bytes", f.name), func(b *testing.B) {
b.SetBytes(int64(len(src)))
for i := 0; i < b.N; i++ {
f.fn(src)
}
})
}
for _, t := range []struct {
name string
caser transform.SpanningTransformer
}{
{"fold/default", Fold()},
{"upper/default", Upper(language.Und)},
{"lower/sigma", Lower(language.Und)},
{"lower/simple", Lower(language.Und, HandleFinalSigma(false))},
{"title/sigma", Title(language.Und)},
{"title/simple", Title(language.Und, HandleFinalSigma(false))},
} {
c := Caser{t.caser}
dst := make([]byte, len(src))
testtext.Bench(b, path.Join(s.name, t.name, "transform"), func(b *testing.B) {
b.SetBytes(int64(len(src)))
for i := 0; i < b.N; i++ {
c.Reset()
c.Transform(dst, src, true)
}
})
// No need to check span for simple cases, as they will be the same
// as sigma.
if strings.HasSuffix(t.name, "/simple") {
continue
}
spanSrc := c.Bytes(src)
testtext.Bench(b, path.Join(s.name, t.name, "span"), func(b *testing.B) {
c.Reset()
if n, _ := c.Span(spanSrc, true); n < len(spanSrc) {
b.Fatalf("spanner is not recognizing text %q as done (at %d)", spanSrc, n)
}
b.SetBytes(int64(len(spanSrc)))
for i := 0; i < b.N; i++ {
c.Reset()
c.Span(spanSrc, true)
}
})
}
}
}

File diff suppressed because it is too large Load diff

1158
vendor/golang.org/x/text/cases/tables_test.go generated vendored Normal file

File diff suppressed because it is too large Load diff