Update go dependencies
This commit is contained in:
parent
15ffb51394
commit
bb4d483837
1621 changed files with 86368 additions and 284392 deletions
258
vendor/golang.org/x/text/encoding/charmap/charmap_test.go
generated
vendored
258
vendor/golang.org/x/text/encoding/charmap/charmap_test.go
generated
vendored
|
|
@ -1,258 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package charmap
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Decode", e.NewDecoder(), nil
|
||||
}
|
||||
|
||||
func encASCIISuperset(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
|
||||
}
|
||||
|
||||
func encEBCDIC(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.RepertoireError(0x3f)
|
||||
}
|
||||
|
||||
func TestNonRepertoire(t *testing.T) {
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{dec, Windows1252, "\x81", "\ufffd"},
|
||||
|
||||
{encEBCDIC, CodePage037, "갂", ""},
|
||||
|
||||
{encEBCDIC, CodePage1047, "갂", ""},
|
||||
{encEBCDIC, CodePage1047, "a¤갂", "\x81\x9F"},
|
||||
|
||||
{encEBCDIC, CodePage1140, "갂", ""},
|
||||
{encEBCDIC, CodePage1140, "a€갂", "\x81\x9F"},
|
||||
|
||||
{encASCIISuperset, Windows1252, "갂", ""},
|
||||
{encASCIISuperset, Windows1252, "a갂", "a"},
|
||||
{encASCIISuperset, Windows1252, "\u00E9갂", "\xE9"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, wantErr := tc.init(tc.e)
|
||||
|
||||
dst, _, err := transform.String(tr, tc.src)
|
||||
if err != wantErr {
|
||||
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
|
||||
}
|
||||
if got := string(dst); got != tc.want {
|
||||
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
e: CodePage037,
|
||||
encoded: "\xc8\x51\xba\x93\xcf",
|
||||
utf8: "Hé[lõ",
|
||||
}, {
|
||||
e: CodePage437,
|
||||
encoded: "H\x82ll\x93 \x9d\xa7\xf4\x9c\xbe",
|
||||
utf8: "Héllô ¥º⌠£╛",
|
||||
}, {
|
||||
e: CodePage866,
|
||||
encoded: "H\xf3\xd3o \x98\xfd\x9f\xdd\xa1",
|
||||
utf8: "Hє╙o Ш¤Я▌б",
|
||||
}, {
|
||||
e: CodePage1047,
|
||||
encoded: "\xc8\x54\x93\x93\x9f",
|
||||
utf8: "Hèll¤",
|
||||
}, {
|
||||
e: CodePage1140,
|
||||
encoded: "\xc8\x9f\x93\x93\xcf",
|
||||
utf8: "H€llõ",
|
||||
}, {
|
||||
e: ISO8859_2,
|
||||
encoded: "Hel\xe5\xf5",
|
||||
utf8: "Helĺő",
|
||||
}, {
|
||||
e: ISO8859_3,
|
||||
encoded: "He\xbd\xd4",
|
||||
utf8: "He½Ô",
|
||||
}, {
|
||||
e: ISO8859_4,
|
||||
encoded: "Hel\xb6\xf8",
|
||||
utf8: "Helļø",
|
||||
}, {
|
||||
e: ISO8859_5,
|
||||
encoded: "H\xd7\xc6o",
|
||||
utf8: "HзЦo",
|
||||
}, {
|
||||
e: ISO8859_6,
|
||||
encoded: "Hel\xc2\xc9",
|
||||
utf8: "Helآة",
|
||||
}, {
|
||||
e: ISO8859_7,
|
||||
encoded: "H\xeel\xebo",
|
||||
utf8: "Hξlλo",
|
||||
}, {
|
||||
e: ISO8859_8,
|
||||
encoded: "Hel\xf5\xed",
|
||||
utf8: "Helץם",
|
||||
}, {
|
||||
e: ISO8859_9,
|
||||
encoded: "\xdeayet",
|
||||
utf8: "Şayet",
|
||||
}, {
|
||||
e: ISO8859_10,
|
||||
encoded: "H\xea\xbfo",
|
||||
utf8: "Hęŋo",
|
||||
}, {
|
||||
e: ISO8859_13,
|
||||
encoded: "H\xe6l\xf9o",
|
||||
utf8: "Hęlło",
|
||||
}, {
|
||||
e: ISO8859_14,
|
||||
encoded: "He\xfe\xd0o",
|
||||
utf8: "HeŷŴo",
|
||||
}, {
|
||||
e: ISO8859_15,
|
||||
encoded: "H\xa4ll\xd8",
|
||||
utf8: "H€llØ",
|
||||
}, {
|
||||
e: ISO8859_16,
|
||||
encoded: "H\xe6ll\xbd",
|
||||
utf8: "Hællœ",
|
||||
}, {
|
||||
e: KOI8R,
|
||||
encoded: "He\x93\xad\x9c",
|
||||
utf8: "He⌠╜°",
|
||||
}, {
|
||||
e: KOI8U,
|
||||
encoded: "He\x93\xad\x9c",
|
||||
utf8: "He⌠ґ°",
|
||||
}, {
|
||||
e: Macintosh,
|
||||
encoded: "He\xdf\xd7",
|
||||
utf8: "Hefl◊",
|
||||
}, {
|
||||
e: MacintoshCyrillic,
|
||||
encoded: "He\xbe\x94",
|
||||
utf8: "HeЊФ",
|
||||
}, {
|
||||
e: Windows874,
|
||||
encoded: "He\xb7\xf0",
|
||||
utf8: "Heท๐",
|
||||
}, {
|
||||
e: Windows1250,
|
||||
encoded: "He\xe5\xe5o",
|
||||
utf8: "Heĺĺo",
|
||||
}, {
|
||||
e: Windows1251,
|
||||
encoded: "H\xball\xfe",
|
||||
utf8: "Hєllю",
|
||||
}, {
|
||||
e: Windows1252,
|
||||
encoded: "H\xe9ll\xf4 \xa5\xbA\xae\xa3\xd0",
|
||||
utf8: "Héllô ¥º®£Ð",
|
||||
}, {
|
||||
e: Windows1253,
|
||||
encoded: "H\xe5ll\xd6",
|
||||
utf8: "HεllΦ",
|
||||
}, {
|
||||
e: Windows1254,
|
||||
encoded: "\xd0ello",
|
||||
utf8: "Ğello",
|
||||
}, {
|
||||
e: Windows1255,
|
||||
encoded: "He\xd4o",
|
||||
utf8: "Heװo",
|
||||
}, {
|
||||
e: Windows1256,
|
||||
encoded: "H\xdbllo",
|
||||
utf8: "Hغllo",
|
||||
}, {
|
||||
e: Windows1257,
|
||||
encoded: "He\xeflo",
|
||||
utf8: "Heļlo",
|
||||
}, {
|
||||
e: Windows1258,
|
||||
encoded: "Hell\xf5",
|
||||
utf8: "Hellơ",
|
||||
}, {
|
||||
e: XUserDefined,
|
||||
encoded: "\x00\x40\x7f\x80\xab\xff",
|
||||
utf8: "\u0000\u0040\u007f\uf780\uf7ab\uf7ff",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
|
||||
}
|
||||
}
|
||||
|
||||
var windows1255TestCases = []struct {
|
||||
b byte
|
||||
ok bool
|
||||
r rune
|
||||
}{
|
||||
{'\x00', true, '\u0000'},
|
||||
{'\x1a', true, '\u001a'},
|
||||
{'\x61', true, '\u0061'},
|
||||
{'\x7f', true, '\u007f'},
|
||||
{'\x80', true, '\u20ac'},
|
||||
{'\x95', true, '\u2022'},
|
||||
{'\xa0', true, '\u00a0'},
|
||||
{'\xc0', true, '\u05b0'},
|
||||
{'\xfc', true, '\ufffd'},
|
||||
{'\xfd', true, '\u200e'},
|
||||
{'\xfe', true, '\u200f'},
|
||||
{'\xff', true, '\ufffd'},
|
||||
{encoding.ASCIISub, false, '\u0400'},
|
||||
{encoding.ASCIISub, false, '\u2603'},
|
||||
{encoding.ASCIISub, false, '\U0001f4a9'},
|
||||
}
|
||||
|
||||
func TestDecodeByte(t *testing.T) {
|
||||
for _, tc := range windows1255TestCases {
|
||||
if !tc.ok {
|
||||
continue
|
||||
}
|
||||
|
||||
got := Windows1255.DecodeByte(tc.b)
|
||||
want := tc.r
|
||||
if got != want {
|
||||
t.Errorf("DecodeByte(%#02x): got %#08x, want %#08x", tc.b, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncodeRune(t *testing.T) {
|
||||
for _, tc := range windows1255TestCases {
|
||||
// There can be multiple tc.b values that map to tc.r = '\ufffd'.
|
||||
if tc.r == '\ufffd' {
|
||||
continue
|
||||
}
|
||||
|
||||
gotB, gotOK := Windows1255.EncodeRune(tc.r)
|
||||
wantB, wantOK := tc.b, tc.ok
|
||||
if gotB != wantB || gotOK != wantOK {
|
||||
t.Errorf("EncodeRune(%#08x): got (%#02x, %t), want (%#02x, %t)", tc.r, gotB, gotOK, wantB, wantOK)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) { enctest.TestFile(t, Windows1252) }
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Windows1252) }
|
||||
290
vendor/golang.org/x/text/encoding/encoding_test.go
generated
vendored
290
vendor/golang.org/x/text/encoding/encoding_test.go
generated
vendored
|
|
@ -1,290 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package encoding_test
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func TestEncodeInvalidUTF8(t *testing.T) {
|
||||
inputs := []string{
|
||||
"hello.",
|
||||
"wo\ufffdld.",
|
||||
"ABC\xff\x80\x80", // Invalid UTF-8.
|
||||
"\x80\x80\x80\x80\x80",
|
||||
"\x80\x80D\x80\x80", // Valid rune at "D".
|
||||
"E\xed\xa0\x80\xed\xbf\xbfF", // Two invalid UTF-8 runes (surrogates).
|
||||
"G",
|
||||
"H\xe2\x82", // U+20AC in UTF-8 is "\xe2\x82\xac", which we split over two
|
||||
"\xacI\xe2\x82", // input lines. It maps to 0x80 in the Windows-1252 encoding.
|
||||
}
|
||||
// Each invalid source byte becomes '\x1a'.
|
||||
want := strings.Replace("hello.wo?ld.ABC??????????D??E??????FGH\x80I??", "?", "\x1a", -1)
|
||||
|
||||
transformer := encoding.ReplaceUnsupported(charmap.Windows1252.NewEncoder())
|
||||
gotBuf := make([]byte, 0, 1024)
|
||||
src := make([]byte, 0, 1024)
|
||||
for i, input := range inputs {
|
||||
dst := make([]byte, 1024)
|
||||
src = append(src, input...)
|
||||
atEOF := i == len(inputs)-1
|
||||
nDst, nSrc, err := transformer.Transform(dst, src, atEOF)
|
||||
gotBuf = append(gotBuf, dst[:nDst]...)
|
||||
src = src[nSrc:]
|
||||
if err != nil && err != transform.ErrShortSrc {
|
||||
t.Fatalf("i=%d: %v", i, err)
|
||||
}
|
||||
if atEOF && err != nil {
|
||||
t.Fatalf("i=%d: atEOF: %v", i, err)
|
||||
}
|
||||
}
|
||||
if got := string(gotBuf); got != want {
|
||||
t.Fatalf("\ngot %+q\nwant %+q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplacement(t *testing.T) {
|
||||
for _, direction := range []string{"Decode", "Encode"} {
|
||||
enc, want := (transform.Transformer)(nil), ""
|
||||
if direction == "Decode" {
|
||||
enc = encoding.Replacement.NewDecoder()
|
||||
want = "\ufffd"
|
||||
} else {
|
||||
enc = encoding.Replacement.NewEncoder()
|
||||
want = "AB\x00CD\ufffdYZ"
|
||||
}
|
||||
sr := strings.NewReader("AB\x00CD\x80YZ")
|
||||
g, err := ioutil.ReadAll(transform.NewReader(sr, enc))
|
||||
if err != nil {
|
||||
t.Errorf("%s: ReadAll: %v", direction, err)
|
||||
continue
|
||||
}
|
||||
if got := string(g); got != want {
|
||||
t.Errorf("%s:\ngot %q\nwant %q", direction, got, want)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUTF8Validator(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
dstSize int
|
||||
src string
|
||||
atEOF bool
|
||||
want string
|
||||
wantErr error
|
||||
}{
|
||||
{
|
||||
"empty input",
|
||||
100,
|
||||
"",
|
||||
false,
|
||||
"",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"valid 1-byte 1-rune input",
|
||||
100,
|
||||
"a",
|
||||
false,
|
||||
"a",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"valid 3-byte 1-rune input",
|
||||
100,
|
||||
"\u1234",
|
||||
false,
|
||||
"\u1234",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"valid 5-byte 3-rune input",
|
||||
100,
|
||||
"a\u0100\u0101",
|
||||
false,
|
||||
"a\u0100\u0101",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"perfectly sized dst (non-ASCII)",
|
||||
5,
|
||||
"a\u0100\u0101",
|
||||
false,
|
||||
"a\u0100\u0101",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"short dst (non-ASCII)",
|
||||
4,
|
||||
"a\u0100\u0101",
|
||||
false,
|
||||
"a\u0100",
|
||||
transform.ErrShortDst,
|
||||
},
|
||||
{
|
||||
"perfectly sized dst (ASCII)",
|
||||
5,
|
||||
"abcde",
|
||||
false,
|
||||
"abcde",
|
||||
nil,
|
||||
},
|
||||
{
|
||||
"short dst (ASCII)",
|
||||
4,
|
||||
"abcde",
|
||||
false,
|
||||
"abcd",
|
||||
transform.ErrShortDst,
|
||||
},
|
||||
{
|
||||
"partial input (!EOF)",
|
||||
100,
|
||||
"a\u0100\xf1",
|
||||
false,
|
||||
"a\u0100",
|
||||
transform.ErrShortSrc,
|
||||
},
|
||||
{
|
||||
"invalid input (EOF)",
|
||||
100,
|
||||
"a\u0100\xf1",
|
||||
true,
|
||||
"a\u0100",
|
||||
encoding.ErrInvalidUTF8,
|
||||
},
|
||||
{
|
||||
"invalid input (!EOF)",
|
||||
100,
|
||||
"a\u0100\x80",
|
||||
false,
|
||||
"a\u0100",
|
||||
encoding.ErrInvalidUTF8,
|
||||
},
|
||||
{
|
||||
"invalid input (above U+10FFFF)",
|
||||
100,
|
||||
"a\u0100\xf7\xbf\xbf\xbf",
|
||||
false,
|
||||
"a\u0100",
|
||||
encoding.ErrInvalidUTF8,
|
||||
},
|
||||
{
|
||||
"invalid input (surrogate half)",
|
||||
100,
|
||||
"a\u0100\xed\xa0\x80",
|
||||
false,
|
||||
"a\u0100",
|
||||
encoding.ErrInvalidUTF8,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dst := make([]byte, tc.dstSize)
|
||||
nDst, nSrc, err := encoding.UTF8Validator.Transform(dst, []byte(tc.src), tc.atEOF)
|
||||
if nDst < 0 || len(dst) < nDst {
|
||||
t.Errorf("%s: nDst=%d out of range", tc.desc, nDst)
|
||||
continue
|
||||
}
|
||||
got := string(dst[:nDst])
|
||||
if got != tc.want || nSrc != len(tc.want) || err != tc.wantErr {
|
||||
t.Errorf("%s:\ngot %+q, %d, %v\nwant %+q, %d, %v",
|
||||
tc.desc, got, nSrc, err, tc.want, len(tc.want), tc.wantErr)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestErrorHandler(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
handler func(*encoding.Encoder) *encoding.Encoder
|
||||
sizeDst int
|
||||
src, want string
|
||||
nSrc int
|
||||
err error
|
||||
}{
|
||||
{
|
||||
desc: "one rune replacement",
|
||||
handler: encoding.ReplaceUnsupported,
|
||||
sizeDst: 100,
|
||||
src: "\uAC00",
|
||||
want: "\x1a",
|
||||
nSrc: 3,
|
||||
},
|
||||
{
|
||||
desc: "mid-stream rune replacement",
|
||||
handler: encoding.ReplaceUnsupported,
|
||||
sizeDst: 100,
|
||||
src: "a\uAC00bcd\u00e9",
|
||||
want: "a\x1abcd\xe9",
|
||||
nSrc: 9,
|
||||
},
|
||||
{
|
||||
desc: "at end rune replacement",
|
||||
handler: encoding.ReplaceUnsupported,
|
||||
sizeDst: 10,
|
||||
src: "\u00e9\uAC00",
|
||||
want: "\xe9\x1a",
|
||||
nSrc: 5,
|
||||
},
|
||||
{
|
||||
desc: "short buffer replacement",
|
||||
handler: encoding.ReplaceUnsupported,
|
||||
sizeDst: 1,
|
||||
src: "\u00e9\uAC00",
|
||||
want: "\xe9",
|
||||
nSrc: 2,
|
||||
err: transform.ErrShortDst,
|
||||
},
|
||||
{
|
||||
desc: "one rune html escape",
|
||||
handler: encoding.HTMLEscapeUnsupported,
|
||||
sizeDst: 100,
|
||||
src: "\uAC00",
|
||||
want: "가",
|
||||
nSrc: 3,
|
||||
},
|
||||
{
|
||||
desc: "mid-stream html escape",
|
||||
handler: encoding.HTMLEscapeUnsupported,
|
||||
sizeDst: 100,
|
||||
src: "\u00e9\uAC00dcba",
|
||||
want: "\xe9가dcba",
|
||||
nSrc: 9,
|
||||
},
|
||||
{
|
||||
desc: "short buffer html escape",
|
||||
handler: encoding.HTMLEscapeUnsupported,
|
||||
sizeDst: 9,
|
||||
src: "ab\uAC01",
|
||||
want: "ab",
|
||||
nSrc: 2,
|
||||
err: transform.ErrShortDst,
|
||||
},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
tr := tc.handler(charmap.Windows1250.NewEncoder())
|
||||
b := make([]byte, tc.sizeDst)
|
||||
nDst, nSrc, err := tr.Transform(b, []byte(tc.src), true)
|
||||
if err != tc.err {
|
||||
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
|
||||
}
|
||||
if got := string(b[:nDst]); got != tc.want {
|
||||
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
|
||||
}
|
||||
if nSrc != tc.nSrc {
|
||||
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
42
vendor/golang.org/x/text/encoding/example_test.go
generated
vendored
42
vendor/golang.org/x/text/encoding/example_test.go
generated
vendored
|
|
@ -1,42 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package encoding_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func ExampleDecodeWindows1252() {
|
||||
sr := strings.NewReader("Gar\xe7on !")
|
||||
tr := charmap.Windows1252.NewDecoder().Reader(sr)
|
||||
io.Copy(os.Stdout, tr)
|
||||
// Output: Garçon !
|
||||
}
|
||||
|
||||
func ExampleUTF8Validator() {
|
||||
for i := 0; i < 2; i++ {
|
||||
var transformer transform.Transformer
|
||||
transformer = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM).NewEncoder()
|
||||
if i == 1 {
|
||||
transformer = transform.Chain(encoding.UTF8Validator, transformer)
|
||||
}
|
||||
dst := make([]byte, 256)
|
||||
src := []byte("abc\xffxyz") // src is invalid UTF-8.
|
||||
nDst, nSrc, err := transformer.Transform(dst, src, true)
|
||||
fmt.Printf("i=%d: produced %q, consumed %q, error %v\n",
|
||||
i, dst[:nDst], src[:nSrc], err)
|
||||
}
|
||||
// Output:
|
||||
// i=0: produced "\x00a\x00b\x00c\xff\xfd\x00x\x00y\x00z", consumed "abc\xffxyz", error <nil>
|
||||
// i=1: produced "\x00a\x00b\x00c", consumed "abc", error encoding: invalid UTF-8
|
||||
}
|
||||
144
vendor/golang.org/x/text/encoding/htmlindex/htmlindex_test.go
generated
vendored
144
vendor/golang.org/x/text/encoding/htmlindex/htmlindex_test.go
generated
vendored
|
|
@ -1,144 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package htmlindex
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
func TestGet(t *testing.T) {
|
||||
for i, tc := range []struct {
|
||||
name string
|
||||
canonical string
|
||||
err error
|
||||
}{
|
||||
{"utf-8", "utf-8", nil},
|
||||
{" utf-8 ", "utf-8", nil},
|
||||
{" l5 ", "windows-1254", nil},
|
||||
{"latin5 ", "windows-1254", nil},
|
||||
{"latin 5", "", errInvalidName},
|
||||
{"latin-5", "", errInvalidName},
|
||||
} {
|
||||
enc, err := Get(tc.name)
|
||||
if err != tc.err {
|
||||
t.Errorf("%d: error was %v; want %v", i, err, tc.err)
|
||||
}
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if got, err := Name(enc); got != tc.canonical {
|
||||
t.Errorf("%d: Name(Get(%q)) = %q; want %q (%v)", i, tc.name, got, tc.canonical, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTables(t *testing.T) {
|
||||
for name, index := range nameMap {
|
||||
got, err := Get(name)
|
||||
if err != nil {
|
||||
t.Errorf("%s:err: expected non-nil error", name)
|
||||
}
|
||||
if want := encodings[index]; got != want {
|
||||
t.Errorf("%s:encoding: got %v; want %v", name, got, want)
|
||||
}
|
||||
mib, _ := got.(identifier.Interface).ID()
|
||||
if mibMap[mib] != index {
|
||||
t.Errorf("%s:mibMab: got %d; want %d", name, mibMap[mib], index)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestName(t *testing.T) {
|
||||
for i, tc := range []struct {
|
||||
desc string
|
||||
enc encoding.Encoding
|
||||
name string
|
||||
err error
|
||||
}{{
|
||||
"defined encoding",
|
||||
charmap.ISO8859_2,
|
||||
"iso-8859-2",
|
||||
nil,
|
||||
}, {
|
||||
"defined Unicode encoding",
|
||||
unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
"utf-16be",
|
||||
nil,
|
||||
}, {
|
||||
"undefined Unicode encoding in HTML standard",
|
||||
unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
|
||||
"",
|
||||
errUnsupported,
|
||||
}, {
|
||||
"undefined other encoding in HTML standard",
|
||||
charmap.CodePage437,
|
||||
"",
|
||||
errUnsupported,
|
||||
}, {
|
||||
"unknown encoding",
|
||||
encoding.Nop,
|
||||
"",
|
||||
errUnknown,
|
||||
}} {
|
||||
name, err := Name(tc.enc)
|
||||
if name != tc.name || err != tc.err {
|
||||
t.Errorf("%d:%s: got %q, %v; want %q, %v", i, tc.desc, name, err, tc.name, tc.err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLanguageDefault(t *testing.T) {
|
||||
for _, tc := range []struct{ tag, want string }{
|
||||
{"und", "windows-1252"}, // The default value.
|
||||
{"ar", "windows-1256"},
|
||||
{"ba", "windows-1251"},
|
||||
{"be", "windows-1251"},
|
||||
{"bg", "windows-1251"},
|
||||
{"cs", "windows-1250"},
|
||||
{"el", "iso-8859-7"},
|
||||
{"et", "windows-1257"},
|
||||
{"fa", "windows-1256"},
|
||||
{"he", "windows-1255"},
|
||||
{"hr", "windows-1250"},
|
||||
{"hu", "iso-8859-2"},
|
||||
{"ja", "shift_jis"},
|
||||
{"kk", "windows-1251"},
|
||||
{"ko", "euc-kr"},
|
||||
{"ku", "windows-1254"},
|
||||
{"ky", "windows-1251"},
|
||||
{"lt", "windows-1257"},
|
||||
{"lv", "windows-1257"},
|
||||
{"mk", "windows-1251"},
|
||||
{"pl", "iso-8859-2"},
|
||||
{"ru", "windows-1251"},
|
||||
{"sah", "windows-1251"},
|
||||
{"sk", "windows-1250"},
|
||||
{"sl", "iso-8859-2"},
|
||||
{"sr", "windows-1251"},
|
||||
{"tg", "windows-1251"},
|
||||
{"th", "windows-874"},
|
||||
{"tr", "windows-1254"},
|
||||
{"tt", "windows-1251"},
|
||||
{"uk", "windows-1251"},
|
||||
{"vi", "windows-1258"},
|
||||
{"zh-hans", "gb18030"},
|
||||
{"zh-hant", "big5"},
|
||||
// Variants and close approximates of the above.
|
||||
{"ar_EG", "windows-1256"},
|
||||
{"bs", "windows-1250"}, // Bosnian Latin maps to Croatian.
|
||||
// Use default fallback in case of miss.
|
||||
{"nl", "windows-1252"},
|
||||
} {
|
||||
if got := LanguageDefault(language.MustParse(tc.tag)); got != tc.want {
|
||||
t.Errorf("LanguageDefault(%s) = %s; want %s", tc.tag, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
248
vendor/golang.org/x/text/encoding/japanese/all_test.go
generated
vendored
248
vendor/golang.org/x/text/encoding/japanese/all_test.go
generated
vendored
|
|
@ -1,248 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Decode", e.NewDecoder(), nil
|
||||
}
|
||||
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
|
||||
}
|
||||
|
||||
func TestNonRepertoire(t *testing.T) {
|
||||
// Pick n to cause the destination buffer in transform.String to overflow.
|
||||
const n = 100
|
||||
long := strings.Repeat(".", n)
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{enc, EUCJP, "갂", ""},
|
||||
{enc, EUCJP, "a갂", "a"},
|
||||
{enc, EUCJP, "丌갂", "\x8f\xb0\xa4"},
|
||||
|
||||
{enc, ISO2022JP, "갂", ""},
|
||||
{enc, ISO2022JP, "a갂", "a"},
|
||||
{enc, ISO2022JP, "朗갂", "\x1b$BzF\x1b(B"}, // switch back to ASCII mode at end
|
||||
|
||||
{enc, ShiftJIS, "갂", ""},
|
||||
{enc, ShiftJIS, "a갂", "a"},
|
||||
{enc, ShiftJIS, "\u2190갂", "\x81\xa9"},
|
||||
|
||||
// Continue correctly after errors
|
||||
{dec, EUCJP, "\x8e\xa0", "\ufffd\ufffd"},
|
||||
{dec, EUCJP, "\x8e\xe0", "\ufffd"},
|
||||
{dec, EUCJP, "\x8e\xff", "\ufffd\ufffd"},
|
||||
{dec, EUCJP, "\x8ea", "\ufffda"},
|
||||
{dec, EUCJP, "\x8f\xa0", "\ufffd\ufffd"},
|
||||
{dec, EUCJP, "\x8f\xa1\xa0", "\ufffd\ufffd"},
|
||||
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
|
||||
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
|
||||
{dec, EUCJP, "\x8f\xa1a", "\ufffda"},
|
||||
{dec, EUCJP, "\x8f\xa2\xa2", "\ufffd"},
|
||||
{dec, EUCJP, "\xfe", "\ufffd"},
|
||||
{dec, EUCJP, "\xfe\xfc", "\ufffd"},
|
||||
{dec, EUCJP, "\xfe\xff", "\ufffd\ufffd"},
|
||||
// Correct handling of end of source
|
||||
{dec, EUCJP, strings.Repeat("\x8e", n), strings.Repeat("\ufffd", n)},
|
||||
{dec, EUCJP, strings.Repeat("\x8f", n), strings.Repeat("\ufffd", n)},
|
||||
{dec, EUCJP, strings.Repeat("\x8f\xa0", n), strings.Repeat("\ufffd", 2*n)},
|
||||
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1", n), "a" + strings.Repeat("\ufffd", n)},
|
||||
{dec, EUCJP, "a" + strings.Repeat("\x8f\xa1\xff", n), "a" + strings.Repeat("\ufffd", 2*n)},
|
||||
|
||||
// Continue correctly after errors
|
||||
{dec, ShiftJIS, "\x80", "\u0080"}, // It's what the spec says.
|
||||
{dec, ShiftJIS, "\x81", "\ufffd"},
|
||||
{dec, ShiftJIS, "\x81\x7f", "\ufffd\u007f"},
|
||||
{dec, ShiftJIS, "\xe0", "\ufffd"},
|
||||
{dec, ShiftJIS, "\xe0\x39", "\ufffd\u0039"},
|
||||
{dec, ShiftJIS, "\xe0\x9f", "燹"},
|
||||
{dec, ShiftJIS, "\xe0\xfd", "\ufffd"},
|
||||
{dec, ShiftJIS, "\xef\xfc", "\ufffd"},
|
||||
{dec, ShiftJIS, "\xfc\xfc", "\ufffd"},
|
||||
{dec, ShiftJIS, "\xfc\xfd", "\ufffd"},
|
||||
{dec, ShiftJIS, "\xfdaa", "\ufffdaa"},
|
||||
|
||||
{dec, ShiftJIS, strings.Repeat("\x81\x81", n), strings.Repeat("=", n)},
|
||||
{dec, ShiftJIS, strings.Repeat("\xe0\xfd", n), strings.Repeat("\ufffd", n)},
|
||||
{dec, ShiftJIS, "a" + strings.Repeat("\xe0\xfd", n), "a" + strings.Repeat("\ufffd", n)},
|
||||
|
||||
{dec, ISO2022JP, "\x1b$", "\ufffd$"},
|
||||
{dec, ISO2022JP, "\x1b(", "\ufffd("},
|
||||
{dec, ISO2022JP, "\x1b@", "\ufffd@"},
|
||||
{dec, ISO2022JP, "\x1bZ", "\ufffdZ"},
|
||||
// incomplete escapes
|
||||
{dec, ISO2022JP, "\x1b$", "\ufffd$"},
|
||||
{dec, ISO2022JP, "\x1b$J.", "\ufffd$J."}, // illegal
|
||||
{dec, ISO2022JP, "\x1b$B.", "\ufffd"}, // JIS208
|
||||
{dec, ISO2022JP, "\x1b$(", "\ufffd$("}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b$(..", "\ufffd$(.."}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b$(" + long, "\ufffd$(" + long}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b$(D.", "\ufffd"}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b$(D..", "\ufffd"}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b$(D...", "\ufffd\ufffd"}, // JIS212
|
||||
{dec, ISO2022JP, "\x1b(B.", "."}, // ascii
|
||||
{dec, ISO2022JP, "\x1b(B..", ".."}, // ascii
|
||||
{dec, ISO2022JP, "\x1b(J.", "."}, // roman
|
||||
{dec, ISO2022JP, "\x1b(J..", ".."}, // roman
|
||||
{dec, ISO2022JP, "\x1b(I\x20", "\ufffd"}, // katakana
|
||||
{dec, ISO2022JP, "\x1b(I\x20\x20", "\ufffd\ufffd"}, // katakana
|
||||
// recover to same state
|
||||
{dec, ISO2022JP, "\x1b(B\x1b.", "\ufffd."},
|
||||
{dec, ISO2022JP, "\x1b(I\x1b.", "\ufffdョ"},
|
||||
{dec, ISO2022JP, "\x1b(I\x1b$.", "\ufffd、ョ"},
|
||||
{dec, ISO2022JP, "\x1b(I\x1b(.", "\ufffdィョ"},
|
||||
{dec, ISO2022JP, "\x1b$B\x7e\x7e", "\ufffd"},
|
||||
{dec, ISO2022JP, "\x1b$@\x0a.", "\x0a."},
|
||||
{dec, ISO2022JP, "\x1b$B\x0a.", "\x0a."},
|
||||
{dec, ISO2022JP, "\x1b$(D\x0a.", "\x0a."},
|
||||
{dec, ISO2022JP, "\x1b$(D\x7e\x7e", "\ufffd"},
|
||||
{dec, ISO2022JP, "\x80", "\ufffd"},
|
||||
|
||||
// TODO: according to https://encoding.spec.whatwg.org/#iso-2022-jp,
|
||||
// these should all be correct.
|
||||
// {dec, ISO2022JP, "\x1b(B\x0E", "\ufffd"},
|
||||
// {dec, ISO2022JP, "\x1b(B\x0F", "\ufffd"},
|
||||
{dec, ISO2022JP, "\x1b(B\x5C", "\u005C"},
|
||||
{dec, ISO2022JP, "\x1b(B\x7E", "\u007E"},
|
||||
// {dec, ISO2022JP, "\x1b(J\x0E", "\ufffd"},
|
||||
// {dec, ISO2022JP, "\x1b(J\x0F", "\ufffd"},
|
||||
// {dec, ISO2022JP, "\x1b(J\x5C", "\u00A5"},
|
||||
// {dec, ISO2022JP, "\x1b(J\x7E", "\u203E"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, wantErr := tc.init(tc.e)
|
||||
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
|
||||
dst := make([]byte, 100000)
|
||||
src := []byte(tc.src)
|
||||
for i := 0; i <= len(tc.src); i++ {
|
||||
nDst, nSrc, err := tr.Transform(dst, src[:i], false)
|
||||
if err != nil && err != transform.ErrShortSrc && err != wantErr {
|
||||
t.Fatalf("error on first call to Transform: %v", err)
|
||||
}
|
||||
n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
|
||||
nDst += n
|
||||
if err != wantErr {
|
||||
t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
|
||||
}
|
||||
if got := string(dst[:nDst]); got != tc.want {
|
||||
t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCorrect(t *testing.T) {
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{dec, ShiftJIS, "\x9f\xfc", "滌"},
|
||||
{dec, ShiftJIS, "\xfb\xfc", "髙"},
|
||||
{dec, ShiftJIS, "\xfa\xb1", "﨑"},
|
||||
{enc, ShiftJIS, "滌", "\x9f\xfc"},
|
||||
{enc, ShiftJIS, "﨑", "\xed\x95"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, _ := tc.init(tc.e)
|
||||
|
||||
dst, _, err := transform.String(tr, tc.src)
|
||||
if err != nil {
|
||||
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, nil)
|
||||
}
|
||||
if got := string(dst); got != tc.want {
|
||||
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
// The encoded forms can be verified by the iconv program:
|
||||
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encPrefix string
|
||||
encSuffix string
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
// "A。カ゚ 0208: etc 0212: etc" is a nonsense string that contains ASCII, half-width
|
||||
// kana, JIS X 0208 (including two near the kink in the Shift JIS second byte
|
||||
// encoding) and JIS X 0212 encodable codepoints.
|
||||
//
|
||||
// "月日は百代の過客にして、行かふ年も又旅人也。" is from the 17th century poem
|
||||
// "Oku no Hosomichi" and contains both hiragana and kanji.
|
||||
e: EUCJP,
|
||||
encoded: "A\x8e\xa1\x8e\xb6\x8e\xdf " +
|
||||
"0208: \xa1\xa1\xa1\xa2\xa1\xdf\xa1\xe0\xa1\xfd\xa1\xfe\xa2\xa1\xa2\xa2\xf4\xa6 " +
|
||||
"0212: \x8f\xa2\xaf\x8f\xed\xe3",
|
||||
utf8: "A。カ゚ " +
|
||||
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199 " +
|
||||
"0212: \u02d8\u9fa5",
|
||||
}, {
|
||||
e: EUCJP,
|
||||
encoded: "\xb7\xee\xc6\xfc\xa4\xcf\xc9\xb4\xc2\xe5\xa4\xce\xb2\xe1\xb5\xd2" +
|
||||
"\xa4\xcb\xa4\xb7\xa4\xc6\xa1\xa2\xb9\xd4\xa4\xab\xa4\xd5\xc7\xaf" +
|
||||
"\xa4\xe2\xcb\xf4\xce\xb9\xbf\xcd\xcc\xe9\xa1\xa3",
|
||||
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
|
||||
}, {
|
||||
e: ISO2022JP,
|
||||
encSuffix: "\x1b\x28\x42",
|
||||
encoded: "\x1b\x28\x49\x21\x36\x5f\x1b\x28\x42 " +
|
||||
"0208: \x1b\x24\x42\x21\x21\x21\x22\x21\x5f\x21\x60\x21\x7d\x21\x7e\x22\x21\x22\x22\x74\x26",
|
||||
utf8: "。カ゚ " +
|
||||
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
|
||||
}, {
|
||||
e: ISO2022JP,
|
||||
encPrefix: "\x1b\x24\x42",
|
||||
encSuffix: "\x1b\x28\x42",
|
||||
encoded: "\x37\x6e\x46\x7c\x24\x4f\x49\x34\x42\x65\x24\x4e\x32\x61\x35\x52" +
|
||||
"\x24\x4b\x24\x37\x24\x46\x21\x22\x39\x54\x24\x2b\x24\x55\x47\x2f" +
|
||||
"\x24\x62\x4b\x74\x4e\x39\x3f\x4d\x4c\x69\x21\x23",
|
||||
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
|
||||
}, {
|
||||
e: ShiftJIS,
|
||||
encoded: "A\xa1\xb6\xdf " +
|
||||
"0208: \x81\x40\x81\x41\x81\x7e\x81\x80\x81\x9d\x81\x9e\x81\x9f\x81\xa0\xea\xa4",
|
||||
utf8: "A。カ゚ " +
|
||||
"0208: \u3000\u3001\u00d7\u00f7\u25ce\u25c7\u25c6\u25a1\u7199",
|
||||
}, {
|
||||
e: ShiftJIS,
|
||||
encoded: "\x8c\x8e\x93\xfa\x82\xcd\x95\x53\x91\xe3\x82\xcc\x89\xdf\x8b\x71" +
|
||||
"\x82\xc9\x82\xb5\x82\xc4\x81\x41\x8d\x73\x82\xa9\x82\xd3\x94\x4e" +
|
||||
"\x82\xe0\x96\x94\x97\xb7\x90\x6c\x96\xe7\x81\x42",
|
||||
utf8: "月日は百代の過客にして、行かふ年も又旅人也。",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) {
|
||||
enctest.TestFile(t, EUCJP)
|
||||
enctest.TestFile(t, ISO2022JP)
|
||||
enctest.TestFile(t, ShiftJIS)
|
||||
}
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) {
|
||||
enctest.Benchmark(b, EUCJP)
|
||||
enctest.Benchmark(b, ISO2022JP)
|
||||
enctest.Benchmark(b, ShiftJIS)
|
||||
}
|
||||
94
vendor/golang.org/x/text/encoding/korean/all_test.go
generated
vendored
94
vendor/golang.org/x/text/encoding/korean/all_test.go
generated
vendored
|
|
@ -1,94 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package korean
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Decode", e.NewDecoder(), nil
|
||||
}
|
||||
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
|
||||
}
|
||||
|
||||
func TestNonRepertoire(t *testing.T) {
|
||||
// Pick n large enough to cause an overflow in the destination buffer of
|
||||
// transform.String.
|
||||
const n = 10000
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{dec, EUCKR, "\xfe\xfe", "\ufffd"},
|
||||
// {dec, EUCKR, "א", "\ufffd"}, // TODO: why is this different?
|
||||
|
||||
{enc, EUCKR, "א", ""},
|
||||
{enc, EUCKR, "aא", "a"},
|
||||
{enc, EUCKR, "\uac00א", "\xb0\xa1"},
|
||||
// TODO: should we also handle Jamo?
|
||||
|
||||
{dec, EUCKR, "\x80", "\ufffd"},
|
||||
{dec, EUCKR, "\xff", "\ufffd"},
|
||||
{dec, EUCKR, "\x81", "\ufffd"},
|
||||
{dec, EUCKR, "\xb0\x40", "\ufffd@"},
|
||||
{dec, EUCKR, "\xb0\xff", "\ufffd"},
|
||||
{dec, EUCKR, "\xd0\x20", "\ufffd "},
|
||||
{dec, EUCKR, "\xd0\xff", "\ufffd"},
|
||||
|
||||
{dec, EUCKR, strings.Repeat("\x81", n), strings.Repeat("걖", n/2)},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, wantErr := tc.init(tc.e)
|
||||
|
||||
dst, _, err := transform.String(tr, tc.src)
|
||||
if err != wantErr {
|
||||
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
|
||||
}
|
||||
if got := string(dst); got != tc.want {
|
||||
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
// The encoded forms can be verified by the iconv program:
|
||||
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
// Korean tests.
|
||||
//
|
||||
// "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D" is a
|
||||
// nonsense string that contains ASCII, Hangul and CJK ideographs.
|
||||
//
|
||||
// "세계야, 안녕" translates as "Hello, world".
|
||||
e: EUCKR,
|
||||
encoded: "A\x81\x41\x81\x61\x81\x81\xc6\xfeB\xc7\xa1\xc7\xfe\xc8\xa1C\xca\xa1\xfd\xfeD",
|
||||
utf8: "A\uac02\uac35\uac56\ud401B\ud408\ud620\ud624C\u4f3d\u8a70D",
|
||||
}, {
|
||||
e: EUCKR,
|
||||
encoded: "\xbc\xbc\xb0\xe8\xbe\xdf\x2c\x20\xbe\xc8\xb3\xe7",
|
||||
utf8: "세계야, 안녕",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) { enctest.TestFile(t, EUCKR) }
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, EUCKR) }
|
||||
143
vendor/golang.org/x/text/encoding/simplifiedchinese/all_test.go
generated
vendored
143
vendor/golang.org/x/text/encoding/simplifiedchinese/all_test.go
generated
vendored
|
|
@ -1,143 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Decode", e.NewDecoder(), nil
|
||||
}
|
||||
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
|
||||
}
|
||||
|
||||
func TestNonRepertoire(t *testing.T) {
|
||||
// Pick n large enough to overflow the destination buffer of transform.String.
|
||||
const n = 10000
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{dec, GBK, "a\xfe\xfeb", "a\ufffdb"},
|
||||
{dec, HZGB2312, "~{z~", "\ufffd"},
|
||||
|
||||
{enc, GBK, "갂", ""},
|
||||
{enc, GBK, "a갂", "a"},
|
||||
{enc, GBK, "\u4e02갂", "\x81@"},
|
||||
|
||||
{enc, HZGB2312, "갂", ""},
|
||||
{enc, HZGB2312, "a갂", "a"},
|
||||
{enc, HZGB2312, "\u6cf5갂", "~{1C~}"},
|
||||
|
||||
{dec, GB18030, "\x80", "€"},
|
||||
{dec, GB18030, "\x81", "\ufffd"},
|
||||
{dec, GB18030, "\x81\x20", "\ufffd "},
|
||||
{dec, GB18030, "\xfe\xfe", "\ufffd"},
|
||||
{dec, GB18030, "\xfe\xff", "\ufffd\ufffd"},
|
||||
{dec, GB18030, "\xfe\x30", "\ufffd0"},
|
||||
{dec, GB18030, "\xfe\x30\x30 ", "\ufffd00 "},
|
||||
{dec, GB18030, "\xfe\x30\xff ", "\ufffd0\ufffd "},
|
||||
{dec, GB18030, "\xfe\x30\x81\x21", "\ufffd0\ufffd!"},
|
||||
|
||||
{dec, GB18030, strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd0", n)},
|
||||
|
||||
{dec, HZGB2312, "~/", "\ufffd"},
|
||||
{dec, HZGB2312, "~{a\x80", "\ufffd"},
|
||||
{dec, HZGB2312, "~{a\x80", "\ufffd"},
|
||||
{dec, HZGB2312, "~{" + strings.Repeat("z~", n), strings.Repeat("\ufffd", n)},
|
||||
{dec, HZGB2312, "~{" + strings.Repeat("\xfe\x30", n), strings.Repeat("\ufffd", n*2)},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, wantErr := tc.init(tc.e)
|
||||
|
||||
dst, _, err := transform.String(tr, tc.src)
|
||||
if err != wantErr {
|
||||
t.Errorf("%s %v(%q): got %v; want %v", dir, tc.e, tc.src, err, wantErr)
|
||||
}
|
||||
if got := string(dst); got != tc.want {
|
||||
t.Errorf("%s %v(%q):\ngot %q\nwant %q", dir, tc.e, tc.src, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
// The encoded forms can be verified by the iconv program:
|
||||
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encPrefix string
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
// "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000" is a
|
||||
// nonsense string that contains GB18030 encodable codepoints of which
|
||||
// only U+00E0 and U+00E1 are GBK encodable.
|
||||
//
|
||||
// "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€" is a nonsense
|
||||
// string that contains ASCII and GBK encodable codepoints from Levels
|
||||
// 1-5 as well as the Euro sign.
|
||||
//
|
||||
// "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€" is a nonsense string
|
||||
// that contains ASCII and Big5 encodable codepoints from the Basic
|
||||
// Multilingual Plane and the Supplementary Ideographic Plane as well as
|
||||
// the Euro sign.
|
||||
//
|
||||
// "花间一壶酒,独酌无相亲。" (simplified) and
|
||||
// "花間一壺酒,獨酌無相親。" (traditional)
|
||||
// are from the 8th century poem "Yuè Xià Dú Zhuó".
|
||||
e: GB18030,
|
||||
encoded: "\x81\x30\x81\x31\x81\x30\x89\x37\x81\x30\x89\x38\xa8\xa4\xa8\xa2" +
|
||||
"\x81\x30\x89\x39\x81\x30\x8a\x30\x84\x31\xa4\x39\x90\x30\x81\x30",
|
||||
utf8: "\u0081\u00de\u00df\u00e0\u00e1\u00e2\u00e3\uffff\U00010000",
|
||||
}, {
|
||||
e: GB18030,
|
||||
encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
|
||||
"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
|
||||
utf8: "花间一壶酒,独酌无相亲。",
|
||||
}, {
|
||||
e: GBK,
|
||||
encoded: "A\xa1\xa1\xb0\xa1\x81\x40\x81\x80\xaa\x40\xaa\x80\xa8\x40\xa8\x80Z\x80",
|
||||
utf8: "A\u3000\u554a\u4e02\u4e90\u72dc\u7349\u02ca\u2588Z€",
|
||||
}, {
|
||||
e: GBK,
|
||||
encoded: "\xbb\xa8\xbc\xe4\xd2\xbb\xba\xf8\xbe\xc6\xa3\xac\xb6\xc0\xd7\xc3" +
|
||||
"\xce\xde\xcf\xe0\xc7\xd7\xa1\xa3",
|
||||
utf8: "花间一壶酒,独酌无相亲。",
|
||||
}, {
|
||||
e: HZGB2312,
|
||||
encoded: "A~{\x21\x21~~\x30\x21~}Z~~",
|
||||
utf8: "A\u3000~\u554aZ~",
|
||||
}, {
|
||||
e: HZGB2312,
|
||||
encPrefix: "~{",
|
||||
encoded: ";(<dR;:x>F#,6@WCN^O`GW!#",
|
||||
utf8: "花间一壶酒,独酌无相亲。",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, "")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) {
|
||||
enctest.TestFile(t, GB18030)
|
||||
enctest.TestFile(t, GBK)
|
||||
enctest.TestFile(t, HZGB2312)
|
||||
}
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) {
|
||||
enctest.Benchmark(b, GB18030)
|
||||
enctest.Benchmark(b, GBK)
|
||||
enctest.Benchmark(b, HZGB2312)
|
||||
}
|
||||
114
vendor/golang.org/x/text/encoding/traditionalchinese/all_test.go
generated
vendored
114
vendor/golang.org/x/text/encoding/traditionalchinese/all_test.go
generated
vendored
|
|
@ -1,114 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package traditionalchinese
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func dec(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Decode", e.NewDecoder(), nil
|
||||
}
|
||||
func enc(e encoding.Encoding) (dir string, t transform.Transformer, err error) {
|
||||
return "Encode", e.NewEncoder(), internal.ErrASCIIReplacement
|
||||
}
|
||||
|
||||
func TestNonRepertoire(t *testing.T) {
|
||||
testCases := []struct {
|
||||
init func(e encoding.Encoding) (string, transform.Transformer, error)
|
||||
e encoding.Encoding
|
||||
src, want string
|
||||
}{
|
||||
{dec, Big5, "\x80", "\ufffd"},
|
||||
{dec, Big5, "\x81", "\ufffd"},
|
||||
{dec, Big5, "\x81\x30", "\ufffd\x30"},
|
||||
{dec, Big5, "\x81\x40", "\ufffd"},
|
||||
{dec, Big5, "\x81\xa0", "\ufffd"},
|
||||
{dec, Big5, "\xff", "\ufffd"},
|
||||
|
||||
{enc, Big5, "갂", ""},
|
||||
{enc, Big5, "a갂", "a"},
|
||||
{enc, Big5, "\u43f0갂", "\x87@"},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
dir, tr, wantErr := tc.init(tc.e)
|
||||
t.Run(fmt.Sprintf("%s/%v/%q", dir, tc.e, tc.src), func(t *testing.T) {
|
||||
dst := make([]byte, 100)
|
||||
src := []byte(tc.src)
|
||||
for i := 0; i <= len(tc.src); i++ {
|
||||
nDst, nSrc, err := tr.Transform(dst, src[:i], false)
|
||||
if err != nil && err != transform.ErrShortSrc && err != wantErr {
|
||||
t.Fatalf("error on first call to Transform: %v", err)
|
||||
}
|
||||
n, _, err := tr.Transform(dst[nDst:], src[nSrc:], true)
|
||||
nDst += n
|
||||
if err != wantErr {
|
||||
t.Fatalf("(%q|%q): got %v; want %v", tc.src[:i], tc.src[i:], err, wantErr)
|
||||
}
|
||||
if got := string(dst[:nDst]); got != tc.want {
|
||||
t.Errorf("(%q|%q):\ngot %q\nwant %q", tc.src[:i], tc.src[i:], got, tc.want)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
// The encoded forms can be verified by the iconv program:
|
||||
// $ echo 月日は百代 | iconv -f UTF-8 -t SHIFT-JIS | xxd
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encPrefix string
|
||||
encSuffix string
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
e: Big5,
|
||||
encoded: "A\x87\x40\x87\x41\x87\x45\xa1\x40\xfe\xfd\xfe\xfeZ\xa3\xe1",
|
||||
utf8: "A\u43f0\u4c32\U00027267\u3000\U0002910d\u79d4Z€",
|
||||
}, {
|
||||
e: Big5,
|
||||
encoded: "\xaa\xe1\xb6\xa1\xa4\x40\xb3\xfd\xb0\x73\xa1\x41\xbf\x57\xb0\x75" +
|
||||
"\xb5\x4c\xac\xdb\xbf\xcb\xa1\x43",
|
||||
utf8: "花間一壺酒,獨酌無相親。",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, "", "")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) { enctest.TestFile(t, Big5) }
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) { enctest.Benchmark(b, Big5) }
|
||||
|
||||
// TestBig5CircumflexAndMacron tests the special cases listed in
|
||||
// http://encoding.spec.whatwg.org/#big5
|
||||
// Note that these special cases aren't preserved by round-tripping through
|
||||
// decoding and encoding (since
|
||||
// http://encoding.spec.whatwg.org/index-big5.txt does not have an entry for
|
||||
// U+0304 or U+030C), so we can't test this in TestBasics.
|
||||
func TestBig5CircumflexAndMacron(t *testing.T) {
|
||||
src := "\x88\x5f\x88\x60\x88\x61\x88\x62\x88\x63\x88\x64\x88\x65\x88\x66 " +
|
||||
"\x88\xa2\x88\xa3\x88\xa4\x88\xa5\x88\xa6"
|
||||
want := "ÓǑÒ\u00ca\u0304Ế\u00ca\u030cỀÊ " +
|
||||
"ü\u00ea\u0304ế\u00ea\u030cề"
|
||||
dst, err := ioutil.ReadAll(transform.NewReader(
|
||||
strings.NewReader(src), Big5.NewDecoder()))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := string(dst); got != want {
|
||||
t.Fatalf("\ngot %q\nwant %q", got, want)
|
||||
}
|
||||
}
|
||||
499
vendor/golang.org/x/text/encoding/unicode/unicode_test.go
generated
vendored
499
vendor/golang.org/x/text/encoding/unicode/unicode_test.go
generated
vendored
|
|
@ -1,499 +0,0 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/enctest"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func TestBasics(t *testing.T) {
|
||||
testCases := []struct {
|
||||
e encoding.Encoding
|
||||
encPrefix string
|
||||
encSuffix string
|
||||
encoded string
|
||||
utf8 string
|
||||
}{{
|
||||
e: utf16BEIB,
|
||||
encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
|
||||
utf8: "\x57\u00e4\U0001d565",
|
||||
}, {
|
||||
e: utf16BEEB,
|
||||
encPrefix: "\xfe\xff",
|
||||
encoded: "\x00\x57\x00\xe4\xd8\x35\xdd\x65",
|
||||
utf8: "\x57\u00e4\U0001d565",
|
||||
}, {
|
||||
e: utf16LEIB,
|
||||
encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
|
||||
utf8: "\x57\u00e4\U0001d565",
|
||||
}, {
|
||||
e: utf16LEEB,
|
||||
encPrefix: "\xff\xfe",
|
||||
encoded: "\x57\x00\xe4\x00\x35\xd8\x65\xdd",
|
||||
utf8: "\x57\u00e4\U0001d565",
|
||||
}}
|
||||
|
||||
for _, tc := range testCases {
|
||||
enctest.TestEncoding(t, tc.e, tc.encoded, tc.utf8, tc.encPrefix, tc.encSuffix)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFiles(t *testing.T) {
|
||||
enctest.TestFile(t, UTF8)
|
||||
enctest.TestFile(t, utf16LEIB)
|
||||
}
|
||||
|
||||
func BenchmarkEncoding(b *testing.B) {
|
||||
enctest.Benchmark(b, UTF8)
|
||||
enctest.Benchmark(b, utf16LEIB)
|
||||
}
|
||||
|
||||
var (
|
||||
utf16LEIB = UTF16(LittleEndian, IgnoreBOM) // UTF-16LE (atypical interpretation)
|
||||
utf16LEUB = UTF16(LittleEndian, UseBOM) // UTF-16, LE
|
||||
utf16LEEB = UTF16(LittleEndian, ExpectBOM) // UTF-16, LE, Expect
|
||||
utf16BEIB = UTF16(BigEndian, IgnoreBOM) // UTF-16BE (atypical interpretation)
|
||||
utf16BEUB = UTF16(BigEndian, UseBOM) // UTF-16 default
|
||||
utf16BEEB = UTF16(BigEndian, ExpectBOM) // UTF-16 Expect
|
||||
)
|
||||
|
||||
func TestUTF16(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
src string
|
||||
notEOF bool // the inverse of atEOF
|
||||
sizeDst int
|
||||
want string
|
||||
nSrc int
|
||||
err error
|
||||
t transform.Transformer
|
||||
}{{
|
||||
desc: "utf-16 IgnoreBOM dec: empty string",
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 UseBOM dec: empty string",
|
||||
t: utf16BEUB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 ExpectBOM dec: empty string",
|
||||
err: ErrMissingBOM,
|
||||
t: utf16BEEB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 dec: BOM determines encoding BE (RFC 2781:3.3)",
|
||||
src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=Ra",
|
||||
nSrc: 12,
|
||||
t: utf16BEUB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 dec: BOM determines encoding LE (RFC 2781:3.3)",
|
||||
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=Ra",
|
||||
nSrc: 12,
|
||||
t: utf16LEUB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 dec: BOM determines encoding LE, change default (RFC 2781:3.3)",
|
||||
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=Ra",
|
||||
nSrc: 12,
|
||||
t: utf16BEUB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 dec: Fail on missing BOM when required",
|
||||
src: "\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x00\x52\x00\x61",
|
||||
sizeDst: 100,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
err: ErrMissingBOM,
|
||||
t: utf16BEEB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 dec: SHOULD interpret text as big-endian when BOM not present (RFC 2781:4.3)",
|
||||
src: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=Ra",
|
||||
nSrc: 10,
|
||||
t: utf16BEUB.NewDecoder(),
|
||||
}, {
|
||||
// This is an error according to RFC 2781. But errors in RFC 2781 are
|
||||
// open to interpretations, so I guess this is fine.
|
||||
desc: "utf-16le dec: incorrect BOM is an error (RFC 2781:4.1)",
|
||||
src: "\xFE\xFF\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFE\U00012345=Ra",
|
||||
nSrc: 12,
|
||||
t: utf16LEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 100,
|
||||
want: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
|
||||
nSrc: 7,
|
||||
t: utf16LEUB.NewEncoder(),
|
||||
}, {
|
||||
desc: "utf-16 enc: SHOULD write BOM (RFC 2781:3.3)",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 100,
|
||||
want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
|
||||
nSrc: 7,
|
||||
t: utf16BEUB.NewEncoder(),
|
||||
}, {
|
||||
desc: "utf-16le enc: MUST NOT write BOM (RFC 2781:3.3)",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 100,
|
||||
want: "\x08\xD8\x45\xDF\x3D\x00\x52\x00\x61\x00",
|
||||
nSrc: 7,
|
||||
t: utf16LEIB.NewEncoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: incorrect UTF-16: odd bytes",
|
||||
src: "\x00",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFD",
|
||||
nSrc: 1,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: unpaired surrogate, odd bytes",
|
||||
src: "\xD8\x45\x00",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFD\uFFFD",
|
||||
nSrc: 3,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: unpaired low surrogate + valid text",
|
||||
src: "\xD8\x45\x00a",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFDa",
|
||||
nSrc: 4,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: unpaired low surrogate + valid text + single byte",
|
||||
src: "\xD8\x45\x00ab",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFDa\uFFFD",
|
||||
nSrc: 5,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16le dec: unpaired high surrogate",
|
||||
src: "\x00\x00\x00\xDC\x12\xD8",
|
||||
sizeDst: 100,
|
||||
want: "\x00\uFFFD\uFFFD",
|
||||
nSrc: 6,
|
||||
t: utf16LEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: two unpaired low surrogates",
|
||||
src: "\xD8\x45\xD8\x12",
|
||||
sizeDst: 100,
|
||||
want: "\uFFFD\uFFFD",
|
||||
nSrc: 4,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16be dec: short dst",
|
||||
src: "\x00a",
|
||||
sizeDst: 0,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "utf-16be dec: short dst surrogate",
|
||||
src: "\xD8\xF5\xDC\x12",
|
||||
sizeDst: 3,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "utf-16be dec: short dst trailing byte",
|
||||
src: "\x00",
|
||||
sizeDst: 2,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "utf-16be dec: short src",
|
||||
src: "\x00",
|
||||
notEOF: true,
|
||||
sizeDst: 3,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
t: utf16BEIB.NewDecoder(),
|
||||
err: transform.ErrShortSrc,
|
||||
}, {
|
||||
desc: "utf-16 enc",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 100,
|
||||
want: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
|
||||
nSrc: 7,
|
||||
t: utf16BEUB.NewEncoder(),
|
||||
}, {
|
||||
desc: "utf-16 enc: short dst normal",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 9,
|
||||
want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52",
|
||||
nSrc: 6,
|
||||
t: utf16BEIB.NewEncoder(),
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "utf-16 enc: short dst surrogate",
|
||||
src: "\U00012345=Ra",
|
||||
sizeDst: 3,
|
||||
want: "",
|
||||
nSrc: 0,
|
||||
t: utf16BEIB.NewEncoder(),
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "utf-16 enc: short src",
|
||||
src: "\U00012345=Ra\xC2",
|
||||
notEOF: true,
|
||||
sizeDst: 100,
|
||||
want: "\xD8\x08\xDF\x45\x00\x3D\x00\x52\x00\x61",
|
||||
nSrc: 7,
|
||||
t: utf16BEIB.NewEncoder(),
|
||||
err: transform.ErrShortSrc,
|
||||
}, {
|
||||
desc: "utf-16be dec: don't change byte order mid-stream",
|
||||
src: "\xFE\xFF\xD8\x08\xDF\x45\x00\x3D\xFF\xFE\x00\x52\x00\x61",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=\ufffeRa",
|
||||
nSrc: 14,
|
||||
t: utf16BEUB.NewDecoder(),
|
||||
}, {
|
||||
desc: "utf-16le dec: don't change byte order mid-stream",
|
||||
src: "\xFF\xFE\x08\xD8\x45\xDF\x3D\x00\xFF\xFE\xFE\xFF\x52\x00\x61\x00",
|
||||
sizeDst: 100,
|
||||
want: "\U00012345=\ufeff\ufffeRa",
|
||||
nSrc: 16,
|
||||
t: utf16LEUB.NewDecoder(),
|
||||
}}
|
||||
for i, tc := range testCases {
|
||||
b := make([]byte, tc.sizeDst)
|
||||
nDst, nSrc, err := tc.t.Transform(b, []byte(tc.src), !tc.notEOF)
|
||||
if err != tc.err {
|
||||
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
|
||||
}
|
||||
if got := string(b[:nDst]); got != tc.want {
|
||||
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
|
||||
}
|
||||
if nSrc != tc.nSrc {
|
||||
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUTF8Decoder(t *testing.T) {
|
||||
testCases := []struct {
|
||||
desc string
|
||||
src string
|
||||
notEOF bool // the inverse of atEOF
|
||||
sizeDst int
|
||||
want string
|
||||
nSrc int
|
||||
err error
|
||||
}{{
|
||||
desc: "empty string, empty dest buffer",
|
||||
}, {
|
||||
desc: "empty string",
|
||||
sizeDst: 8,
|
||||
}, {
|
||||
desc: "empty string, streaming",
|
||||
notEOF: true,
|
||||
sizeDst: 8,
|
||||
}, {
|
||||
desc: "ascii",
|
||||
src: "abcde",
|
||||
sizeDst: 8,
|
||||
want: "abcde",
|
||||
nSrc: 5,
|
||||
}, {
|
||||
desc: "ascii and error",
|
||||
src: "ab\x80de",
|
||||
sizeDst: 7,
|
||||
want: "ab\ufffdde",
|
||||
nSrc: 5,
|
||||
}, {
|
||||
desc: "valid two-byte sequence",
|
||||
src: "a\u0300bc",
|
||||
sizeDst: 7,
|
||||
want: "a\u0300bc",
|
||||
nSrc: 5,
|
||||
}, {
|
||||
desc: "valid three-byte sequence",
|
||||
src: "a\u0300中",
|
||||
sizeDst: 7,
|
||||
want: "a\u0300中",
|
||||
nSrc: 6,
|
||||
}, {
|
||||
desc: "valid four-byte sequence",
|
||||
src: "a中\U00016F50",
|
||||
sizeDst: 8,
|
||||
want: "a中\U00016F50",
|
||||
nSrc: 8,
|
||||
}, {
|
||||
desc: "short source buffer",
|
||||
src: "abc\xf0\x90",
|
||||
notEOF: true,
|
||||
sizeDst: 10,
|
||||
want: "abc",
|
||||
nSrc: 3,
|
||||
err: transform.ErrShortSrc,
|
||||
}, {
|
||||
// We don't check for the maximal subpart of an ill-formed subsequence
|
||||
// at the end of an open segment.
|
||||
desc: "complete invalid that looks like short at end",
|
||||
src: "abc\xf0\x80",
|
||||
notEOF: true,
|
||||
sizeDst: 10,
|
||||
want: "abc", // instead of "abc\ufffd\ufffd",
|
||||
nSrc: 3,
|
||||
err: transform.ErrShortSrc,
|
||||
}, {
|
||||
desc: "incomplete sequence at end",
|
||||
src: "a\x80bc\xf0\x90",
|
||||
sizeDst: 9,
|
||||
want: "a\ufffdbc\ufffd",
|
||||
nSrc: 6,
|
||||
}, {
|
||||
desc: "invalid second byte",
|
||||
src: "abc\xf0dddd",
|
||||
sizeDst: 10,
|
||||
want: "abc\ufffddddd",
|
||||
nSrc: 8,
|
||||
}, {
|
||||
desc: "invalid second byte at end",
|
||||
src: "abc\xf0d",
|
||||
sizeDst: 10,
|
||||
want: "abc\ufffdd",
|
||||
nSrc: 5,
|
||||
}, {
|
||||
desc: "invalid third byte",
|
||||
src: "a\u0300bc\xf0\x90dddd",
|
||||
sizeDst: 12,
|
||||
want: "a\u0300bc\ufffddddd",
|
||||
nSrc: 11,
|
||||
}, {
|
||||
desc: "invalid third byte at end",
|
||||
src: "a\u0300bc\xf0\x90d",
|
||||
sizeDst: 12,
|
||||
want: "a\u0300bc\ufffdd",
|
||||
nSrc: 8,
|
||||
}, {
|
||||
desc: "invalid fourth byte, tight buffer",
|
||||
src: "a\u0300bc\xf0\x90\x80d",
|
||||
sizeDst: 9,
|
||||
want: "a\u0300bc\ufffdd",
|
||||
nSrc: 9,
|
||||
}, {
|
||||
desc: "invalid fourth byte at end",
|
||||
src: "a\u0300bc\xf0\x90\x80",
|
||||
sizeDst: 8,
|
||||
want: "a\u0300bc\ufffd",
|
||||
nSrc: 8,
|
||||
}, {
|
||||
desc: "invalid fourth byte and short four byte sequence",
|
||||
src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
|
||||
notEOF: true,
|
||||
sizeDst: 20,
|
||||
want: "a\u0300bc\ufffd",
|
||||
nSrc: 8,
|
||||
err: transform.ErrShortSrc,
|
||||
}, {
|
||||
desc: "valid four-byte sequence overflowing short buffer",
|
||||
src: "a\u0300bc\xf0\x90\x80\x80",
|
||||
notEOF: true,
|
||||
sizeDst: 8,
|
||||
want: "a\u0300bc",
|
||||
nSrc: 5,
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "invalid fourth byte at end short, but short dst",
|
||||
src: "a\u0300bc\xf0\x90\x80\xf0\x90\x80",
|
||||
notEOF: true,
|
||||
sizeDst: 8,
|
||||
// More bytes would fit in the buffer, but this seems to require a more
|
||||
// complicated and slower algorithm.
|
||||
want: "a\u0300bc", // instead of "a\u0300bc"
|
||||
nSrc: 5,
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "short dst for error",
|
||||
src: "abc\x80",
|
||||
notEOF: true,
|
||||
sizeDst: 5,
|
||||
want: "abc",
|
||||
nSrc: 3,
|
||||
err: transform.ErrShortDst,
|
||||
}, {
|
||||
desc: "adjusting short dst buffer",
|
||||
src: "abc\x80ef",
|
||||
notEOF: true,
|
||||
sizeDst: 6,
|
||||
want: "abc\ufffd",
|
||||
nSrc: 4,
|
||||
err: transform.ErrShortDst,
|
||||
}}
|
||||
tr := UTF8.NewDecoder()
|
||||
for i, tc := range testCases {
|
||||
b := make([]byte, tc.sizeDst)
|
||||
nDst, nSrc, err := tr.Transform(b, []byte(tc.src), !tc.notEOF)
|
||||
if err != tc.err {
|
||||
t.Errorf("%d:%s: error was %v; want %v", i, tc.desc, err, tc.err)
|
||||
}
|
||||
if got := string(b[:nDst]); got != tc.want {
|
||||
t.Errorf("%d:%s: result was %q: want %q", i, tc.desc, got, tc.want)
|
||||
}
|
||||
if nSrc != tc.nSrc {
|
||||
t.Errorf("%d:%s: nSrc was %d; want %d", i, tc.desc, nSrc, tc.nSrc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBOMOverride(t *testing.T) {
|
||||
dec := BOMOverride(charmap.CodePage437.NewDecoder())
|
||||
dst := make([]byte, 100)
|
||||
for i, tc := range []struct {
|
||||
src string
|
||||
atEOF bool
|
||||
dst string
|
||||
nSrc int
|
||||
err error
|
||||
}{
|
||||
0: {"H\x82ll\x93", true, "Héllô", 5, nil},
|
||||
1: {"\uFEFFHéllö", true, "Héllö", 10, nil},
|
||||
2: {"\xFE\xFF\x00H\x00e\x00l\x00l\x00o", true, "Hello", 12, nil},
|
||||
3: {"\xFF\xFEH\x00e\x00l\x00l\x00o\x00", true, "Hello", 12, nil},
|
||||
4: {"\uFEFF", true, "", 3, nil},
|
||||
5: {"\xFE\xFF", true, "", 2, nil},
|
||||
6: {"\xFF\xFE", true, "", 2, nil},
|
||||
7: {"\xEF\xBB", true, "\u2229\u2557", 2, nil},
|
||||
8: {"\xEF", true, "\u2229", 1, nil},
|
||||
9: {"", true, "", 0, nil},
|
||||
10: {"\xFE", true, "\u25a0", 1, nil},
|
||||
11: {"\xFF", true, "\u00a0", 1, nil},
|
||||
12: {"\xEF\xBB", false, "", 0, transform.ErrShortSrc},
|
||||
13: {"\xEF", false, "", 0, transform.ErrShortSrc},
|
||||
14: {"", false, "", 0, transform.ErrShortSrc},
|
||||
15: {"\xFE", false, "", 0, transform.ErrShortSrc},
|
||||
16: {"\xFF", false, "", 0, transform.ErrShortSrc},
|
||||
17: {"\xFF\xFE", false, "", 0, transform.ErrShortSrc},
|
||||
} {
|
||||
dec.Reset()
|
||||
nDst, nSrc, err := dec.Transform(dst, []byte(tc.src), tc.atEOF)
|
||||
got := string(dst[:nDst])
|
||||
if nSrc != tc.nSrc {
|
||||
t.Errorf("%d: nSrc: got %d; want %d", i, nSrc, tc.nSrc)
|
||||
}
|
||||
if got != tc.dst {
|
||||
t.Errorf("%d: got %+q; want %+q", i, got, tc.dst)
|
||||
}
|
||||
if err != tc.err {
|
||||
t.Errorf("%d: error: got %v; want %v", i, err, tc.err)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue