Update go dependencies
This commit is contained in:
parent
15ffb51394
commit
bb4d483837
1621 changed files with 86368 additions and 284392 deletions
109
vendor/golang.org/x/net/html/atom/atom_test.go
generated
vendored
109
vendor/golang.org/x/net/html/atom/atom_test.go
generated
vendored
|
|
@ -1,109 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package atom
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestKnown(t *testing.T) {
|
||||
for _, s := range testAtomList {
|
||||
if atom := Lookup([]byte(s)); atom.String() != s {
|
||||
t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHits(t *testing.T) {
|
||||
for _, a := range table {
|
||||
if a == 0 {
|
||||
continue
|
||||
}
|
||||
got := Lookup([]byte(a.String()))
|
||||
if got != a {
|
||||
t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMisses(t *testing.T) {
|
||||
testCases := []string{
|
||||
"",
|
||||
"\x00",
|
||||
"\xff",
|
||||
"A",
|
||||
"DIV",
|
||||
"Div",
|
||||
"dIV",
|
||||
"aa",
|
||||
"a\x00",
|
||||
"ab",
|
||||
"abb",
|
||||
"abbr0",
|
||||
"abbr ",
|
||||
" abbr",
|
||||
" a",
|
||||
"acceptcharset",
|
||||
"acceptCharset",
|
||||
"accept_charset",
|
||||
"h0",
|
||||
"h1h2",
|
||||
"h7",
|
||||
"onClick",
|
||||
"λ",
|
||||
// The following string has the same hash (0xa1d7fab7) as "onmouseover".
|
||||
"\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
got := Lookup([]byte(tc))
|
||||
if got != 0 {
|
||||
t.Errorf("Lookup(%q): got %d, want 0", tc, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestForeignObject(t *testing.T) {
|
||||
const (
|
||||
afo = Foreignobject
|
||||
afO = ForeignObject
|
||||
sfo = "foreignobject"
|
||||
sfO = "foreignObject"
|
||||
)
|
||||
if got := Lookup([]byte(sfo)); got != afo {
|
||||
t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
|
||||
}
|
||||
if got := Lookup([]byte(sfO)); got != afO {
|
||||
t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
|
||||
}
|
||||
if got := afo.String(); got != sfo {
|
||||
t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
|
||||
}
|
||||
if got := afO.String(); got != sfO {
|
||||
t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkLookup(b *testing.B) {
|
||||
sortedTable := make([]string, 0, len(table))
|
||||
for _, a := range table {
|
||||
if a != 0 {
|
||||
sortedTable = append(sortedTable, a.String())
|
||||
}
|
||||
}
|
||||
sort.Strings(sortedTable)
|
||||
|
||||
x := make([][]byte, 1000)
|
||||
for i := range x {
|
||||
x[i] = []byte(sortedTable[i%len(sortedTable)])
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
for _, s := range x {
|
||||
Lookup(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
5
vendor/golang.org/x/net/html/atom/gen.go
generated
vendored
5
vendor/golang.org/x/net/html/atom/gen.go
generated
vendored
|
|
@ -306,7 +306,7 @@ func (t *table) push(i uint32, depth int) bool {
|
|||
|
||||
// The lists of element names and attribute keys were taken from
|
||||
// https://html.spec.whatwg.org/multipage/indices.html#index
|
||||
// as of the "HTML Living Standard - Last Updated 18 September 2017" version.
|
||||
// as of the "HTML Living Standard - Last Updated 16 April 2018" version.
|
||||
|
||||
// "command", "keygen" and "menuitem" have been removed from the spec,
|
||||
// but are kept here for backwards compatibility.
|
||||
|
|
@ -665,6 +665,7 @@ var eventHandlers = []string{
|
|||
|
||||
// extra are ad-hoc values not covered by any of the lists above.
|
||||
var extra = []string{
|
||||
"acronym",
|
||||
"align",
|
||||
"annotation",
|
||||
"annotation-xml",
|
||||
|
|
@ -700,6 +701,8 @@ var extra = []string{
|
|||
"plaintext",
|
||||
"prompt",
|
||||
"public",
|
||||
"rb",
|
||||
"rtc",
|
||||
"spacer",
|
||||
"strike",
|
||||
"svg",
|
||||
|
|
|
|||
1444
vendor/golang.org/x/net/html/atom/table.go
generated
vendored
1444
vendor/golang.org/x/net/html/atom/table.go
generated
vendored
File diff suppressed because it is too large
Load diff
373
vendor/golang.org/x/net/html/atom/table_test.go
generated
vendored
373
vendor/golang.org/x/net/html/atom/table_test.go
generated
vendored
|
|
@ -1,373 +0,0 @@
|
|||
// Code generated by go generate gen.go; DO NOT EDIT.
|
||||
|
||||
//go:generate go run gen.go -test
|
||||
|
||||
package atom
|
||||
|
||||
var testAtomList = []string{
|
||||
"a",
|
||||
"abbr",
|
||||
"accept",
|
||||
"accept-charset",
|
||||
"accesskey",
|
||||
"action",
|
||||
"address",
|
||||
"align",
|
||||
"allowfullscreen",
|
||||
"allowpaymentrequest",
|
||||
"allowusermedia",
|
||||
"alt",
|
||||
"annotation",
|
||||
"annotation-xml",
|
||||
"applet",
|
||||
"area",
|
||||
"article",
|
||||
"as",
|
||||
"aside",
|
||||
"async",
|
||||
"audio",
|
||||
"autocomplete",
|
||||
"autofocus",
|
||||
"autoplay",
|
||||
"b",
|
||||
"base",
|
||||
"basefont",
|
||||
"bdi",
|
||||
"bdo",
|
||||
"bgsound",
|
||||
"big",
|
||||
"blink",
|
||||
"blockquote",
|
||||
"body",
|
||||
"br",
|
||||
"button",
|
||||
"canvas",
|
||||
"caption",
|
||||
"center",
|
||||
"challenge",
|
||||
"charset",
|
||||
"checked",
|
||||
"cite",
|
||||
"class",
|
||||
"code",
|
||||
"col",
|
||||
"colgroup",
|
||||
"color",
|
||||
"cols",
|
||||
"colspan",
|
||||
"command",
|
||||
"content",
|
||||
"contenteditable",
|
||||
"contextmenu",
|
||||
"controls",
|
||||
"coords",
|
||||
"crossorigin",
|
||||
"data",
|
||||
"datalist",
|
||||
"datetime",
|
||||
"dd",
|
||||
"default",
|
||||
"defer",
|
||||
"del",
|
||||
"desc",
|
||||
"details",
|
||||
"dfn",
|
||||
"dialog",
|
||||
"dir",
|
||||
"dirname",
|
||||
"disabled",
|
||||
"div",
|
||||
"dl",
|
||||
"download",
|
||||
"draggable",
|
||||
"dropzone",
|
||||
"dt",
|
||||
"em",
|
||||
"embed",
|
||||
"enctype",
|
||||
"face",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"font",
|
||||
"footer",
|
||||
"for",
|
||||
"foreignObject",
|
||||
"foreignobject",
|
||||
"form",
|
||||
"formaction",
|
||||
"formenctype",
|
||||
"formmethod",
|
||||
"formnovalidate",
|
||||
"formtarget",
|
||||
"frame",
|
||||
"frameset",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"headers",
|
||||
"height",
|
||||
"hgroup",
|
||||
"hidden",
|
||||
"high",
|
||||
"hr",
|
||||
"href",
|
||||
"hreflang",
|
||||
"html",
|
||||
"http-equiv",
|
||||
"i",
|
||||
"icon",
|
||||
"id",
|
||||
"iframe",
|
||||
"image",
|
||||
"img",
|
||||
"input",
|
||||
"inputmode",
|
||||
"ins",
|
||||
"integrity",
|
||||
"is",
|
||||
"isindex",
|
||||
"ismap",
|
||||
"itemid",
|
||||
"itemprop",
|
||||
"itemref",
|
||||
"itemscope",
|
||||
"itemtype",
|
||||
"kbd",
|
||||
"keygen",
|
||||
"keytype",
|
||||
"kind",
|
||||
"label",
|
||||
"lang",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"list",
|
||||
"listing",
|
||||
"loop",
|
||||
"low",
|
||||
"main",
|
||||
"malignmark",
|
||||
"manifest",
|
||||
"map",
|
||||
"mark",
|
||||
"marquee",
|
||||
"math",
|
||||
"max",
|
||||
"maxlength",
|
||||
"media",
|
||||
"mediagroup",
|
||||
"menu",
|
||||
"menuitem",
|
||||
"meta",
|
||||
"meter",
|
||||
"method",
|
||||
"mglyph",
|
||||
"mi",
|
||||
"min",
|
||||
"minlength",
|
||||
"mn",
|
||||
"mo",
|
||||
"ms",
|
||||
"mtext",
|
||||
"multiple",
|
||||
"muted",
|
||||
"name",
|
||||
"nav",
|
||||
"nobr",
|
||||
"noembed",
|
||||
"noframes",
|
||||
"nomodule",
|
||||
"nonce",
|
||||
"noscript",
|
||||
"novalidate",
|
||||
"object",
|
||||
"ol",
|
||||
"onabort",
|
||||
"onafterprint",
|
||||
"onautocomplete",
|
||||
"onautocompleteerror",
|
||||
"onauxclick",
|
||||
"onbeforeprint",
|
||||
"onbeforeunload",
|
||||
"onblur",
|
||||
"oncancel",
|
||||
"oncanplay",
|
||||
"oncanplaythrough",
|
||||
"onchange",
|
||||
"onclick",
|
||||
"onclose",
|
||||
"oncontextmenu",
|
||||
"oncopy",
|
||||
"oncuechange",
|
||||
"oncut",
|
||||
"ondblclick",
|
||||
"ondrag",
|
||||
"ondragend",
|
||||
"ondragenter",
|
||||
"ondragexit",
|
||||
"ondragleave",
|
||||
"ondragover",
|
||||
"ondragstart",
|
||||
"ondrop",
|
||||
"ondurationchange",
|
||||
"onemptied",
|
||||
"onended",
|
||||
"onerror",
|
||||
"onfocus",
|
||||
"onhashchange",
|
||||
"oninput",
|
||||
"oninvalid",
|
||||
"onkeydown",
|
||||
"onkeypress",
|
||||
"onkeyup",
|
||||
"onlanguagechange",
|
||||
"onload",
|
||||
"onloadeddata",
|
||||
"onloadedmetadata",
|
||||
"onloadend",
|
||||
"onloadstart",
|
||||
"onmessage",
|
||||
"onmessageerror",
|
||||
"onmousedown",
|
||||
"onmouseenter",
|
||||
"onmouseleave",
|
||||
"onmousemove",
|
||||
"onmouseout",
|
||||
"onmouseover",
|
||||
"onmouseup",
|
||||
"onmousewheel",
|
||||
"onoffline",
|
||||
"ononline",
|
||||
"onpagehide",
|
||||
"onpageshow",
|
||||
"onpaste",
|
||||
"onpause",
|
||||
"onplay",
|
||||
"onplaying",
|
||||
"onpopstate",
|
||||
"onprogress",
|
||||
"onratechange",
|
||||
"onrejectionhandled",
|
||||
"onreset",
|
||||
"onresize",
|
||||
"onscroll",
|
||||
"onsecuritypolicyviolation",
|
||||
"onseeked",
|
||||
"onseeking",
|
||||
"onselect",
|
||||
"onshow",
|
||||
"onsort",
|
||||
"onstalled",
|
||||
"onstorage",
|
||||
"onsubmit",
|
||||
"onsuspend",
|
||||
"ontimeupdate",
|
||||
"ontoggle",
|
||||
"onunhandledrejection",
|
||||
"onunload",
|
||||
"onvolumechange",
|
||||
"onwaiting",
|
||||
"onwheel",
|
||||
"open",
|
||||
"optgroup",
|
||||
"optimum",
|
||||
"option",
|
||||
"output",
|
||||
"p",
|
||||
"param",
|
||||
"pattern",
|
||||
"picture",
|
||||
"ping",
|
||||
"placeholder",
|
||||
"plaintext",
|
||||
"playsinline",
|
||||
"poster",
|
||||
"pre",
|
||||
"preload",
|
||||
"progress",
|
||||
"prompt",
|
||||
"public",
|
||||
"q",
|
||||
"radiogroup",
|
||||
"readonly",
|
||||
"referrerpolicy",
|
||||
"rel",
|
||||
"required",
|
||||
"reversed",
|
||||
"rows",
|
||||
"rowspan",
|
||||
"rp",
|
||||
"rt",
|
||||
"ruby",
|
||||
"s",
|
||||
"samp",
|
||||
"sandbox",
|
||||
"scope",
|
||||
"scoped",
|
||||
"script",
|
||||
"seamless",
|
||||
"section",
|
||||
"select",
|
||||
"selected",
|
||||
"shape",
|
||||
"size",
|
||||
"sizes",
|
||||
"slot",
|
||||
"small",
|
||||
"sortable",
|
||||
"sorted",
|
||||
"source",
|
||||
"spacer",
|
||||
"span",
|
||||
"spellcheck",
|
||||
"src",
|
||||
"srcdoc",
|
||||
"srclang",
|
||||
"srcset",
|
||||
"start",
|
||||
"step",
|
||||
"strike",
|
||||
"strong",
|
||||
"style",
|
||||
"sub",
|
||||
"summary",
|
||||
"sup",
|
||||
"svg",
|
||||
"system",
|
||||
"tabindex",
|
||||
"table",
|
||||
"target",
|
||||
"tbody",
|
||||
"td",
|
||||
"template",
|
||||
"textarea",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"time",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"translate",
|
||||
"tt",
|
||||
"type",
|
||||
"typemustmatch",
|
||||
"u",
|
||||
"ul",
|
||||
"updateviacache",
|
||||
"usemap",
|
||||
"value",
|
||||
"var",
|
||||
"video",
|
||||
"wbr",
|
||||
"width",
|
||||
"workertype",
|
||||
"wrap",
|
||||
"xmp",
|
||||
}
|
||||
237
vendor/golang.org/x/net/html/charset/charset_test.go
generated
vendored
237
vendor/golang.org/x/net/html/charset/charset_test.go
generated
vendored
|
|
@ -1,237 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"io/ioutil"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
func transformString(t transform.Transformer, s string) (string, error) {
|
||||
r := transform.NewReader(strings.NewReader(s), t)
|
||||
b, err := ioutil.ReadAll(r)
|
||||
return string(b), err
|
||||
}
|
||||
|
||||
type testCase struct {
|
||||
utf8, other, otherEncoding string
|
||||
}
|
||||
|
||||
// testCases for encoding and decoding.
|
||||
var testCases = []testCase{
|
||||
{"Résumé", "Résumé", "utf8"},
|
||||
{"Résumé", "R\xe9sum\xe9", "latin1"},
|
||||
{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
|
||||
{"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
|
||||
{"Hello, world", "Hello, world", "ASCII"},
|
||||
{"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
|
||||
{"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
|
||||
{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
|
||||
{"latviešu", "latvie\xf0u", "ISO-8859-13"},
|
||||
{"Seònaid", "Se\xf2naid", "ISO-8859-14"},
|
||||
{"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
|
||||
{"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
|
||||
{"nutraĵo", "nutra\xbco", "ISO-8859-3"},
|
||||
{"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
|
||||
{"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
|
||||
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
|
||||
{"Kağan", "Ka\xf0an", "ISO-8859-9"},
|
||||
{"Résumé", "R\x8esum\x8e", "macintosh"},
|
||||
{"Gdańsk", "Gda\xf1sk", "windows-1250"},
|
||||
{"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
|
||||
{"Résumé", "R\xe9sum\xe9", "windows-1252"},
|
||||
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
|
||||
{"Kağan", "Ka\xf0an", "windows-1254"},
|
||||
{"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
|
||||
{"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
|
||||
{"latviešu", "latvie\xf0u", "windows-1257"},
|
||||
{"Việt", "Vi\xea\xf2t", "windows-1258"},
|
||||
{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
|
||||
{"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
|
||||
{"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
|
||||
{"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
|
||||
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
|
||||
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
|
||||
{"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
|
||||
{"㧯", "\x82\x31\x89\x38", "gb18030"},
|
||||
{"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
|
||||
{"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
|
||||
{"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
|
||||
{"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
|
||||
{"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
|
||||
{"다음과 같은 조건을 따라야 합니다: 저작자표시", "\xb4\xd9\xc0\xbd\xb0\xfa \xb0\xb0\xc0\xba \xc1\xb6\xb0\xc7\xc0\xbb \xb5\xfb\xb6\xf3\xbe\xdf \xc7մϴ\xd9: \xc0\xfa\xc0\xdb\xc0\xdaǥ\xbd\xc3", "EUC-KR"},
|
||||
}
|
||||
|
||||
func TestDecode(t *testing.T) {
|
||||
testCases := append(testCases, []testCase{
|
||||
// Replace multi-byte maximum subpart of ill-formed subsequence with
|
||||
// single replacement character (WhatWG requirement).
|
||||
{"Rés\ufffdumé", "Rés\xe1\x80umé", "utf8"},
|
||||
}...)
|
||||
for _, tc := range testCases {
|
||||
e, _ := Lookup(tc.otherEncoding)
|
||||
if e == nil {
|
||||
t.Errorf("%s: not found", tc.otherEncoding)
|
||||
continue
|
||||
}
|
||||
s, err := transformString(e.NewDecoder(), tc.other)
|
||||
if err != nil {
|
||||
t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
|
||||
continue
|
||||
}
|
||||
if s != tc.utf8 {
|
||||
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEncode(t *testing.T) {
|
||||
testCases := append(testCases, []testCase{
|
||||
// Use Go-style replacement.
|
||||
{"Rés\xe1\x80umé", "Rés\ufffd\ufffdumé", "utf8"},
|
||||
// U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
|
||||
{"Gdańsk", "Gdańsk", "ISO-8859-11"},
|
||||
{"\ufffd", "�", "ISO-8859-11"},
|
||||
{"a\xe1\x80b", "a��b", "ISO-8859-11"},
|
||||
}...)
|
||||
for _, tc := range testCases {
|
||||
e, _ := Lookup(tc.otherEncoding)
|
||||
if e == nil {
|
||||
t.Errorf("%s: not found", tc.otherEncoding)
|
||||
continue
|
||||
}
|
||||
s, err := transformString(e.NewEncoder(), tc.utf8)
|
||||
if err != nil {
|
||||
t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
|
||||
continue
|
||||
}
|
||||
if s != tc.other {
|
||||
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var sniffTestCases = []struct {
|
||||
filename, declared, want string
|
||||
}{
|
||||
{"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
|
||||
{"UTF-16LE-BOM.html", "", "utf-16le"},
|
||||
{"UTF-16BE-BOM.html", "", "utf-16be"},
|
||||
{"meta-content-attribute.html", "text/html", "iso-8859-15"},
|
||||
{"meta-charset-attribute.html", "text/html", "iso-8859-15"},
|
||||
{"No-encoding-declaration.html", "text/html", "utf-8"},
|
||||
{"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
|
||||
{"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
|
||||
{"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
|
||||
{"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
|
||||
{"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
|
||||
}
|
||||
|
||||
func TestSniff(t *testing.T) {
|
||||
switch runtime.GOOS {
|
||||
case "nacl": // platforms that don't permit direct file system access
|
||||
t.Skipf("not supported on %q", runtime.GOOS)
|
||||
}
|
||||
|
||||
for _, tc := range sniffTestCases {
|
||||
content, err := ioutil.ReadFile("testdata/" + tc.filename)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error reading file: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
_, name, _ := DetermineEncoding(content, tc.declared)
|
||||
if name != tc.want {
|
||||
t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReader(t *testing.T) {
|
||||
switch runtime.GOOS {
|
||||
case "nacl": // platforms that don't permit direct file system access
|
||||
t.Skipf("not supported on %q", runtime.GOOS)
|
||||
}
|
||||
|
||||
for _, tc := range sniffTestCases {
|
||||
content, err := ioutil.ReadFile("testdata/" + tc.filename)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error reading file: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
r, err := NewReader(bytes.NewReader(content), tc.declared)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error creating reader: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
got, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
e, _ := Lookup(tc.want)
|
||||
want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
|
||||
if err != nil {
|
||||
t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
|
||||
continue
|
||||
}
|
||||
|
||||
if !bytes.Equal(got, want) {
|
||||
t.Errorf("%s: got %q, want %q", tc.filename, got, want)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var metaTestCases = []struct {
|
||||
meta, want string
|
||||
}{
|
||||
{"", ""},
|
||||
{"text/html", ""},
|
||||
{"text/html; charset utf-8", ""},
|
||||
{"text/html; charset=latin-2", "latin-2"},
|
||||
{"text/html; charset; charset = utf-8", "utf-8"},
|
||||
{`charset="big5"`, "big5"},
|
||||
{"charset='shift_jis'", "shift_jis"},
|
||||
}
|
||||
|
||||
func TestFromMeta(t *testing.T) {
|
||||
for _, tc := range metaTestCases {
|
||||
got := fromMetaElement(tc.meta)
|
||||
if got != tc.want {
|
||||
t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestXML(t *testing.T) {
|
||||
const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
|
||||
|
||||
d := xml.NewDecoder(strings.NewReader(s))
|
||||
d.CharsetReader = NewReaderLabel
|
||||
|
||||
var a struct {
|
||||
Word string
|
||||
}
|
||||
err := d.Decode(&a)
|
||||
if err != nil {
|
||||
t.Fatalf("Decode: %v", err)
|
||||
}
|
||||
|
||||
want := "résumé"
|
||||
if a.Word != want {
|
||||
t.Errorf("got %q, want %q", a.Word, want)
|
||||
}
|
||||
}
|
||||
2
vendor/golang.org/x/net/html/const.go
generated
vendored
2
vendor/golang.org/x/net/html/const.go
generated
vendored
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
package html
|
||||
|
||||
// Section 12.2.3.2 of the HTML5 specification says "The following elements
|
||||
// Section 12.2.4.2 of the HTML5 specification says "The following elements
|
||||
// have varying levels of special parsing rules".
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
|
||||
var isSpecialElementMap = map[string]bool{
|
||||
|
|
|
|||
8
vendor/golang.org/x/net/html/doc.go
generated
vendored
8
vendor/golang.org/x/net/html/doc.go
generated
vendored
|
|
@ -49,18 +49,18 @@ call to Next. For example, to extract an HTML page's anchor text:
|
|||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case ErrorToken:
|
||||
case html.ErrorToken:
|
||||
return z.Err()
|
||||
case TextToken:
|
||||
case html.TextToken:
|
||||
if depth > 0 {
|
||||
// emitBytes should copy the []byte it receives,
|
||||
// if it doesn't process it immediately.
|
||||
emitBytes(z.Text())
|
||||
}
|
||||
case StartTagToken, EndTagToken:
|
||||
case html.StartTagToken, html.EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == StartTagToken {
|
||||
if tt == html.StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
|
|
|
|||
4154
vendor/golang.org/x/net/html/entity.go
generated
vendored
4154
vendor/golang.org/x/net/html/entity.go
generated
vendored
File diff suppressed because it is too large
Load diff
29
vendor/golang.org/x/net/html/entity_test.go
generated
vendored
29
vendor/golang.org/x/net/html/entity_test.go
generated
vendored
|
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func TestEntityLength(t *testing.T) {
|
||||
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
|
||||
// The +1 comes from the leading "&". This property implies that the length of
|
||||
// unescaped text is <= the length of escaped text.
|
||||
for k, v := range entity {
|
||||
if 1+len(k) < utf8.RuneLen(v) {
|
||||
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
|
||||
}
|
||||
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
|
||||
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
|
||||
}
|
||||
}
|
||||
for k, v := range entity2 {
|
||||
if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
|
||||
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
|
||||
}
|
||||
}
|
||||
}
|
||||
97
vendor/golang.org/x/net/html/escape_test.go
generated
vendored
97
vendor/golang.org/x/net/html/escape_test.go
generated
vendored
|
|
@ -1,97 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import "testing"
|
||||
|
||||
type unescapeTest struct {
|
||||
// A short description of the test case.
|
||||
desc string
|
||||
// The HTML text.
|
||||
html string
|
||||
// The unescaped text.
|
||||
unescaped string
|
||||
}
|
||||
|
||||
var unescapeTests = []unescapeTest{
|
||||
// Handle no entities.
|
||||
{
|
||||
"copy",
|
||||
"A\ttext\nstring",
|
||||
"A\ttext\nstring",
|
||||
},
|
||||
// Handle simple named entities.
|
||||
{
|
||||
"simple",
|
||||
"& > <",
|
||||
"& > <",
|
||||
},
|
||||
// Handle hitting the end of the string.
|
||||
{
|
||||
"stringEnd",
|
||||
"& &",
|
||||
"& &",
|
||||
},
|
||||
// Handle entities with two codepoints.
|
||||
{
|
||||
"multiCodepoint",
|
||||
"text ⋛︀ blah",
|
||||
"text \u22db\ufe00 blah",
|
||||
},
|
||||
// Handle decimal numeric entities.
|
||||
{
|
||||
"decimalEntity",
|
||||
"Delta = Δ ",
|
||||
"Delta = Δ ",
|
||||
},
|
||||
// Handle hexadecimal numeric entities.
|
||||
{
|
||||
"hexadecimalEntity",
|
||||
"Lambda = λ = λ ",
|
||||
"Lambda = λ = λ ",
|
||||
},
|
||||
// Handle numeric early termination.
|
||||
{
|
||||
"numericEnds",
|
||||
"&# &#x €43 © = ©f = ©",
|
||||
"&# &#x €43 © = ©f = ©",
|
||||
},
|
||||
// Handle numeric ISO-8859-1 entity replacements.
|
||||
{
|
||||
"numericReplacements",
|
||||
"Footnote‡",
|
||||
"Footnote‡",
|
||||
},
|
||||
}
|
||||
|
||||
func TestUnescape(t *testing.T) {
|
||||
for _, tt := range unescapeTests {
|
||||
unescaped := UnescapeString(tt.html)
|
||||
if unescaped != tt.unescaped {
|
||||
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnescapeEscape(t *testing.T) {
|
||||
ss := []string{
|
||||
``,
|
||||
`abc def`,
|
||||
`a & b`,
|
||||
`a&b`,
|
||||
`a & b`,
|
||||
`"`,
|
||||
`"`,
|
||||
`"<&>"`,
|
||||
`"<&>"`,
|
||||
`3&5==1 && 0<1, "0<1", a+acute=á`,
|
||||
`The special characters are: <, >, &, ' and "`,
|
||||
}
|
||||
for _, s := range ss {
|
||||
if got := UnescapeString(EscapeString(s)); got != s {
|
||||
t.Errorf("got %q want %q", got, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
40
vendor/golang.org/x/net/html/example_test.go
generated
vendored
40
vendor/golang.org/x/net/html/example_test.go
generated
vendored
|
|
@ -1,40 +0,0 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// This example demonstrates parsing HTML data and walking the resulting tree.
|
||||
package html_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
func ExampleParse() {
|
||||
s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
|
||||
doc, err := html.Parse(strings.NewReader(s))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
var f func(*html.Node)
|
||||
f = func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && n.Data == "a" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "href" {
|
||||
fmt.Println(a.Val)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
f(c)
|
||||
}
|
||||
}
|
||||
f(doc)
|
||||
// Output:
|
||||
// foo
|
||||
// /bar/baz
|
||||
}
|
||||
6
vendor/golang.org/x/net/html/foreign.go
generated
vendored
6
vendor/golang.org/x/net/html/foreign.go
generated
vendored
|
|
@ -67,7 +67,7 @@ func mathMLTextIntegrationPoint(n *Node) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.5.
|
||||
// Section 12.2.6.5.
|
||||
var breakout = map[string]bool{
|
||||
"b": true,
|
||||
"big": true,
|
||||
|
|
@ -115,7 +115,7 @@ var breakout = map[string]bool{
|
|||
"var": true,
|
||||
}
|
||||
|
||||
// Section 12.2.5.5.
|
||||
// Section 12.2.6.5.
|
||||
var svgTagNameAdjustments = map[string]string{
|
||||
"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
|
|
@ -155,7 +155,7 @@ var svgTagNameAdjustments = map[string]string{
|
|||
"textpath": "textPath",
|
||||
}
|
||||
|
||||
// Section 12.2.5.1
|
||||
// Section 12.2.6.1
|
||||
var mathMLAttributeAdjustments = map[string]string{
|
||||
"definitionurl": "definitionURL",
|
||||
}
|
||||
|
|
|
|||
33
vendor/golang.org/x/net/html/node.go
generated
vendored
33
vendor/golang.org/x/net/html/node.go
generated
vendored
|
|
@ -21,9 +21,10 @@ const (
|
|||
scopeMarkerNode
|
||||
)
|
||||
|
||||
// Section 12.2.3.3 says "scope markers are inserted when entering applet
|
||||
// elements, buttons, object elements, marquees, table cells, and table
|
||||
// captions, and are used to prevent formatting from 'leaking'".
|
||||
// Section 12.2.4.3 says "The markers are inserted when entering applet,
|
||||
// object, marquee, template, td, th, and caption elements, and are used
|
||||
// to prevent formatting from "leaking" into applet, object, marquee,
|
||||
// template, td, th, and caption elements".
|
||||
var scopeMarker = Node{Type: scopeMarkerNode}
|
||||
|
||||
// A Node consists of a NodeType and some Data (tag name for element nodes,
|
||||
|
|
@ -173,6 +174,16 @@ func (s *nodeStack) index(n *Node) int {
|
|||
return -1
|
||||
}
|
||||
|
||||
// contains returns whether a is within s.
|
||||
func (s *nodeStack) contains(a atom.Atom) bool {
|
||||
for _, n := range *s {
|
||||
if n.DataAtom == a {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// insert inserts a node at the given index.
|
||||
func (s *nodeStack) insert(i int, n *Node) {
|
||||
(*s) = append(*s, nil)
|
||||
|
|
@ -191,3 +202,19 @@ func (s *nodeStack) remove(n *Node) {
|
|||
(*s)[j] = nil
|
||||
*s = (*s)[:j]
|
||||
}
|
||||
|
||||
type insertionModeStack []insertionMode
|
||||
|
||||
func (s *insertionModeStack) pop() (im insertionMode) {
|
||||
i := len(*s)
|
||||
im = (*s)[i-1]
|
||||
*s = (*s)[:i-1]
|
||||
return im
|
||||
}
|
||||
|
||||
func (s *insertionModeStack) top() insertionMode {
|
||||
if i := len(*s); i > 0 {
|
||||
return (*s)[i-1]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
|||
146
vendor/golang.org/x/net/html/node_test.go
generated
vendored
146
vendor/golang.org/x/net/html/node_test.go
generated
vendored
|
|
@ -1,146 +0,0 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// checkTreeConsistency checks that a node and its descendants are all
|
||||
// consistent in their parent/child/sibling relationships.
|
||||
func checkTreeConsistency(n *Node) error {
|
||||
return checkTreeConsistency1(n, 0)
|
||||
}
|
||||
|
||||
func checkTreeConsistency1(n *Node, depth int) error {
|
||||
if depth == 1e4 {
|
||||
return fmt.Errorf("html: tree looks like it contains a cycle")
|
||||
}
|
||||
if err := checkNodeConsistency(n); err != nil {
|
||||
return err
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := checkTreeConsistency1(c, depth+1); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkNodeConsistency checks that a node's parent/child/sibling relationships
|
||||
// are consistent.
|
||||
func checkNodeConsistency(n *Node) error {
|
||||
if n == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
nParent := 0
|
||||
for p := n.Parent; p != nil; p = p.Parent {
|
||||
nParent++
|
||||
if nParent == 1e4 {
|
||||
return fmt.Errorf("html: parent list looks like an infinite loop")
|
||||
}
|
||||
}
|
||||
|
||||
nForward := 0
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
nForward++
|
||||
if nForward == 1e6 {
|
||||
return fmt.Errorf("html: forward list of children looks like an infinite loop")
|
||||
}
|
||||
if c.Parent != n {
|
||||
return fmt.Errorf("html: inconsistent child/parent relationship")
|
||||
}
|
||||
}
|
||||
|
||||
nBackward := 0
|
||||
for c := n.LastChild; c != nil; c = c.PrevSibling {
|
||||
nBackward++
|
||||
if nBackward == 1e6 {
|
||||
return fmt.Errorf("html: backward list of children looks like an infinite loop")
|
||||
}
|
||||
if c.Parent != n {
|
||||
return fmt.Errorf("html: inconsistent child/parent relationship")
|
||||
}
|
||||
}
|
||||
|
||||
if n.Parent != nil {
|
||||
if n.Parent == n {
|
||||
return fmt.Errorf("html: inconsistent parent relationship")
|
||||
}
|
||||
if n.Parent == n.FirstChild {
|
||||
return fmt.Errorf("html: inconsistent parent/first relationship")
|
||||
}
|
||||
if n.Parent == n.LastChild {
|
||||
return fmt.Errorf("html: inconsistent parent/last relationship")
|
||||
}
|
||||
if n.Parent == n.PrevSibling {
|
||||
return fmt.Errorf("html: inconsistent parent/prev relationship")
|
||||
}
|
||||
if n.Parent == n.NextSibling {
|
||||
return fmt.Errorf("html: inconsistent parent/next relationship")
|
||||
}
|
||||
|
||||
parentHasNAsAChild := false
|
||||
for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
|
||||
if c == n {
|
||||
parentHasNAsAChild = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !parentHasNAsAChild {
|
||||
return fmt.Errorf("html: inconsistent parent/child relationship")
|
||||
}
|
||||
}
|
||||
|
||||
if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
|
||||
return fmt.Errorf("html: inconsistent prev/next relationship")
|
||||
}
|
||||
if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
|
||||
return fmt.Errorf("html: inconsistent next/prev relationship")
|
||||
}
|
||||
|
||||
if (n.FirstChild == nil) != (n.LastChild == nil) {
|
||||
return fmt.Errorf("html: inconsistent first/last relationship")
|
||||
}
|
||||
if n.FirstChild != nil && n.FirstChild == n.LastChild {
|
||||
// We have a sole child.
|
||||
if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
|
||||
return fmt.Errorf("html: inconsistent sole child's sibling relationship")
|
||||
}
|
||||
}
|
||||
|
||||
seen := map[*Node]bool{}
|
||||
|
||||
var last *Node
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if seen[c] {
|
||||
return fmt.Errorf("html: inconsistent repeated child")
|
||||
}
|
||||
seen[c] = true
|
||||
last = c
|
||||
}
|
||||
if last != n.LastChild {
|
||||
return fmt.Errorf("html: inconsistent last relationship")
|
||||
}
|
||||
|
||||
var first *Node
|
||||
for c := n.LastChild; c != nil; c = c.PrevSibling {
|
||||
if !seen[c] {
|
||||
return fmt.Errorf("html: inconsistent missing child")
|
||||
}
|
||||
delete(seen, c)
|
||||
first = c
|
||||
}
|
||||
if first != n.FirstChild {
|
||||
return fmt.Errorf("html: inconsistent first relationship")
|
||||
}
|
||||
|
||||
if len(seen) != 0 {
|
||||
return fmt.Errorf("html: inconsistent forwards/backwards child list")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
361
vendor/golang.org/x/net/html/parse.go
generated
vendored
361
vendor/golang.org/x/net/html/parse.go
generated
vendored
|
|
@ -25,20 +25,22 @@ type parser struct {
|
|||
hasSelfClosingToken bool
|
||||
// doc is the document root element.
|
||||
doc *Node
|
||||
// The stack of open elements (section 12.2.3.2) and active formatting
|
||||
// elements (section 12.2.3.3).
|
||||
// The stack of open elements (section 12.2.4.2) and active formatting
|
||||
// elements (section 12.2.4.3).
|
||||
oe, afe nodeStack
|
||||
// Element pointers (section 12.2.3.4).
|
||||
// Element pointers (section 12.2.4.4).
|
||||
head, form *Node
|
||||
// Other parsing state flags (section 12.2.3.5).
|
||||
// Other parsing state flags (section 12.2.4.5).
|
||||
scripting, framesetOK bool
|
||||
// The stack of template insertion modes
|
||||
templateStack insertionModeStack
|
||||
// im is the current insertion mode.
|
||||
im insertionMode
|
||||
// originalIM is the insertion mode to go back to after completing a text
|
||||
// or inTableText insertion mode.
|
||||
originalIM insertionMode
|
||||
// fosterParenting is whether new elements should be inserted according to
|
||||
// the foster parenting rules (section 12.2.5.3).
|
||||
// the foster parenting rules (section 12.2.6.1).
|
||||
fosterParenting bool
|
||||
// quirks is whether the parser is operating in "quirks mode."
|
||||
quirks bool
|
||||
|
|
@ -56,7 +58,7 @@ func (p *parser) top() *Node {
|
|||
return p.doc
|
||||
}
|
||||
|
||||
// Stop tags for use in popUntil. These come from section 12.2.3.2.
|
||||
// Stop tags for use in popUntil. These come from section 12.2.4.2.
|
||||
var (
|
||||
defaultScopeStopTags = map[string][]a.Atom{
|
||||
"": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
|
||||
|
|
@ -79,7 +81,7 @@ const (
|
|||
|
||||
// popUntil pops the stack of open elements at the highest element whose tag
|
||||
// is in matchTags, provided there is no higher element in the scope's stop
|
||||
// tags (as defined in section 12.2.3.2). It returns whether or not there was
|
||||
// tags (as defined in section 12.2.4.2). It returns whether or not there was
|
||||
// such an element. If there was not, popUntil leaves the stack unchanged.
|
||||
//
|
||||
// For example, the set of stop tags for table scope is: "html", "table". If
|
||||
|
|
@ -126,7 +128,7 @@ func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
|
|||
return -1
|
||||
}
|
||||
case tableScope:
|
||||
if tagAtom == a.Html || tagAtom == a.Table {
|
||||
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
|
||||
return -1
|
||||
}
|
||||
case selectScope:
|
||||
|
|
@ -162,17 +164,17 @@ func (p *parser) clearStackToContext(s scope) {
|
|||
tagAtom := p.oe[i].DataAtom
|
||||
switch s {
|
||||
case tableScope:
|
||||
if tagAtom == a.Html || tagAtom == a.Table {
|
||||
if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
|
||||
p.oe = p.oe[:i+1]
|
||||
return
|
||||
}
|
||||
case tableRowScope:
|
||||
if tagAtom == a.Html || tagAtom == a.Tr {
|
||||
if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
|
||||
p.oe = p.oe[:i+1]
|
||||
return
|
||||
}
|
||||
case tableBodyScope:
|
||||
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
|
||||
if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
|
||||
p.oe = p.oe[:i+1]
|
||||
return
|
||||
}
|
||||
|
|
@ -183,7 +185,7 @@ func (p *parser) clearStackToContext(s scope) {
|
|||
}
|
||||
|
||||
// generateImpliedEndTags pops nodes off the stack of open elements as long as
|
||||
// the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
|
||||
// the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
|
||||
// If exceptions are specified, nodes with that name will not be popped off.
|
||||
func (p *parser) generateImpliedEndTags(exceptions ...string) {
|
||||
var i int
|
||||
|
|
@ -192,7 +194,7 @@ loop:
|
|||
n := p.oe[i]
|
||||
if n.Type == ElementNode {
|
||||
switch n.DataAtom {
|
||||
case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
|
||||
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
|
||||
for _, except := range exceptions {
|
||||
if n.Data == except {
|
||||
break loop
|
||||
|
|
@ -207,6 +209,27 @@ loop:
|
|||
p.oe = p.oe[:i+1]
|
||||
}
|
||||
|
||||
// generateAllImpliedEndTags pops nodes off the stack of open elements as long as
|
||||
// the top node has a tag name of caption, colgroup, dd, div, dt, li, optgroup, option, p, rb,
|
||||
// rp, rt, rtc, span, tbody, td, tfoot, th, thead or tr.
|
||||
func (p *parser) generateAllImpliedEndTags() {
|
||||
var i int
|
||||
for i = len(p.oe) - 1; i >= 0; i-- {
|
||||
n := p.oe[i]
|
||||
if n.Type == ElementNode {
|
||||
switch n.DataAtom {
|
||||
// TODO: remove this divergence from the HTML5 spec
|
||||
case a.Caption, a.Colgroup, a.Dd, a.Div, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb,
|
||||
a.Rp, a.Rt, a.Rtc, a.Span, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
|
||||
continue
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
p.oe = p.oe[:i+1]
|
||||
}
|
||||
|
||||
// addChild adds a child node n to the top element, and pushes n onto the stack
|
||||
// of open elements if it is an element node.
|
||||
func (p *parser) addChild(n *Node) {
|
||||
|
|
@ -234,9 +257,9 @@ func (p *parser) shouldFosterParent() bool {
|
|||
}
|
||||
|
||||
// fosterParent adds a child node according to the foster parenting rules.
|
||||
// Section 12.2.5.3, "foster parenting".
|
||||
// Section 12.2.6.1, "foster parenting".
|
||||
func (p *parser) fosterParent(n *Node) {
|
||||
var table, parent, prev *Node
|
||||
var table, parent, prev, template *Node
|
||||
var i int
|
||||
for i = len(p.oe) - 1; i >= 0; i-- {
|
||||
if p.oe[i].DataAtom == a.Table {
|
||||
|
|
@ -245,6 +268,19 @@ func (p *parser) fosterParent(n *Node) {
|
|||
}
|
||||
}
|
||||
|
||||
var j int
|
||||
for j = len(p.oe) - 1; j >= 0; j-- {
|
||||
if p.oe[j].DataAtom == a.Template {
|
||||
template = p.oe[j]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if template != nil && (table == nil || j < i) {
|
||||
template.AppendChild(n)
|
||||
return
|
||||
}
|
||||
|
||||
if table == nil {
|
||||
// The foster parent is the html element.
|
||||
parent = p.oe[0]
|
||||
|
|
@ -304,7 +340,7 @@ func (p *parser) addElement() {
|
|||
})
|
||||
}
|
||||
|
||||
// Section 12.2.3.3.
|
||||
// Section 12.2.4.3.
|
||||
func (p *parser) addFormattingElement() {
|
||||
tagAtom, attr := p.tok.DataAtom, p.tok.Attr
|
||||
p.addElement()
|
||||
|
|
@ -351,7 +387,7 @@ findIdenticalElements:
|
|||
p.afe = append(p.afe, p.top())
|
||||
}
|
||||
|
||||
// Section 12.2.3.3.
|
||||
// Section 12.2.4.3.
|
||||
func (p *parser) clearActiveFormattingElements() {
|
||||
for {
|
||||
n := p.afe.pop()
|
||||
|
|
@ -361,7 +397,7 @@ func (p *parser) clearActiveFormattingElements() {
|
|||
}
|
||||
}
|
||||
|
||||
// Section 12.2.3.3.
|
||||
// Section 12.2.4.3.
|
||||
func (p *parser) reconstructActiveFormattingElements() {
|
||||
n := p.afe.top()
|
||||
if n == nil {
|
||||
|
|
@ -390,12 +426,12 @@ func (p *parser) reconstructActiveFormattingElements() {
|
|||
}
|
||||
}
|
||||
|
||||
// Section 12.2.4.
|
||||
// Section 12.2.5.
|
||||
func (p *parser) acknowledgeSelfClosingTag() {
|
||||
p.hasSelfClosingToken = false
|
||||
}
|
||||
|
||||
// An insertion mode (section 12.2.3.1) is the state transition function from
|
||||
// An insertion mode (section 12.2.4.1) is the state transition function from
|
||||
// a particular state in the HTML5 parser's state machine. It updates the
|
||||
// parser's fields depending on parser.tok (where ErrorToken means EOF).
|
||||
// It returns whether the token was consumed.
|
||||
|
|
@ -403,7 +439,7 @@ type insertionMode func(*parser) bool
|
|||
|
||||
// setOriginalIM sets the insertion mode to return to after completing a text or
|
||||
// inTableText insertion mode.
|
||||
// Section 12.2.3.1, "using the rules for".
|
||||
// Section 12.2.4.1, "using the rules for".
|
||||
func (p *parser) setOriginalIM() {
|
||||
if p.originalIM != nil {
|
||||
panic("html: bad parser state: originalIM was set twice")
|
||||
|
|
@ -411,18 +447,38 @@ func (p *parser) setOriginalIM() {
|
|||
p.originalIM = p.im
|
||||
}
|
||||
|
||||
// Section 12.2.3.1, "reset the insertion mode".
|
||||
// Section 12.2.4.1, "reset the insertion mode".
|
||||
func (p *parser) resetInsertionMode() {
|
||||
for i := len(p.oe) - 1; i >= 0; i-- {
|
||||
n := p.oe[i]
|
||||
if i == 0 && p.context != nil {
|
||||
last := i == 0
|
||||
if last && p.context != nil {
|
||||
n = p.context
|
||||
}
|
||||
|
||||
switch n.DataAtom {
|
||||
case a.Select:
|
||||
if !last {
|
||||
for ancestor, first := n, p.oe[0]; ancestor != first; {
|
||||
if ancestor == first {
|
||||
break
|
||||
}
|
||||
ancestor = p.oe[p.oe.index(ancestor)-1]
|
||||
switch ancestor.DataAtom {
|
||||
case a.Template:
|
||||
p.im = inSelectIM
|
||||
return
|
||||
case a.Table:
|
||||
p.im = inSelectInTableIM
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
p.im = inSelectIM
|
||||
case a.Td, a.Th:
|
||||
// TODO: remove this divergence from the HTML5 spec.
|
||||
//
|
||||
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
||||
p.im = inCellIM
|
||||
case a.Tr:
|
||||
p.im = inRowIM
|
||||
|
|
@ -434,25 +490,37 @@ func (p *parser) resetInsertionMode() {
|
|||
p.im = inColumnGroupIM
|
||||
case a.Table:
|
||||
p.im = inTableIM
|
||||
case a.Template:
|
||||
p.im = p.templateStack.top()
|
||||
case a.Head:
|
||||
p.im = inBodyIM
|
||||
// TODO: remove this divergence from the HTML5 spec.
|
||||
//
|
||||
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
||||
p.im = inHeadIM
|
||||
case a.Body:
|
||||
p.im = inBodyIM
|
||||
case a.Frameset:
|
||||
p.im = inFramesetIM
|
||||
case a.Html:
|
||||
p.im = beforeHeadIM
|
||||
if p.head == nil {
|
||||
p.im = beforeHeadIM
|
||||
} else {
|
||||
p.im = afterHeadIM
|
||||
}
|
||||
default:
|
||||
if last {
|
||||
p.im = inBodyIM
|
||||
return
|
||||
}
|
||||
continue
|
||||
}
|
||||
return
|
||||
}
|
||||
p.im = inBodyIM
|
||||
}
|
||||
|
||||
const whitespace = " \t\r\n\f"
|
||||
|
||||
// Section 12.2.5.4.1.
|
||||
// Section 12.2.6.4.1.
|
||||
func initialIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -479,7 +547,7 @@ func initialIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.2.
|
||||
// Section 12.2.6.4.2.
|
||||
func beforeHTMLIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case DoctypeToken:
|
||||
|
|
@ -517,7 +585,7 @@ func beforeHTMLIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.3.
|
||||
// Section 12.2.6.4.3.
|
||||
func beforeHeadIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -560,7 +628,7 @@ func beforeHeadIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.4.
|
||||
// Section 12.2.6.4.4.
|
||||
func inHeadIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -590,19 +658,36 @@ func inHeadIM(p *parser) bool {
|
|||
case a.Head:
|
||||
// Ignore the token.
|
||||
return true
|
||||
case a.Template:
|
||||
p.addElement()
|
||||
p.afe = append(p.afe, &scopeMarker)
|
||||
p.framesetOK = false
|
||||
p.im = inTemplateIM
|
||||
p.templateStack = append(p.templateStack, inTemplateIM)
|
||||
return true
|
||||
}
|
||||
case EndTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
case a.Head:
|
||||
n := p.oe.pop()
|
||||
if n.DataAtom != a.Head {
|
||||
panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
|
||||
}
|
||||
p.oe.pop()
|
||||
p.im = afterHeadIM
|
||||
return true
|
||||
case a.Body, a.Html, a.Br:
|
||||
p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
|
||||
return false
|
||||
case a.Template:
|
||||
if !p.oe.contains(a.Template) {
|
||||
return true
|
||||
}
|
||||
p.generateAllImpliedEndTags()
|
||||
if n := p.oe.top(); n.DataAtom != a.Template {
|
||||
return true
|
||||
}
|
||||
p.popUntil(defaultScope, a.Template)
|
||||
p.clearActiveFormattingElements()
|
||||
p.templateStack.pop()
|
||||
p.resetInsertionMode()
|
||||
return true
|
||||
default:
|
||||
// Ignore the token.
|
||||
return true
|
||||
|
|
@ -622,7 +707,7 @@ func inHeadIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.6.
|
||||
// Section 12.2.6.4.6.
|
||||
func afterHeadIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -648,7 +733,7 @@ func afterHeadIM(p *parser) bool {
|
|||
p.addElement()
|
||||
p.im = inFramesetIM
|
||||
return true
|
||||
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
|
||||
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
||||
p.oe = append(p.oe, p.head)
|
||||
defer p.oe.remove(p.head)
|
||||
return inHeadIM(p)
|
||||
|
|
@ -660,6 +745,8 @@ func afterHeadIM(p *parser) bool {
|
|||
switch p.tok.DataAtom {
|
||||
case a.Body, a.Html, a.Br:
|
||||
// Drop down to creating an implied <body> tag.
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
default:
|
||||
// Ignore the token.
|
||||
return true
|
||||
|
|
@ -697,7 +784,7 @@ func copyAttributes(dst *Node, src Token) {
|
|||
}
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.7.
|
||||
// Section 12.2.6.4.7.
|
||||
func inBodyIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -727,10 +814,16 @@ func inBodyIM(p *parser) bool {
|
|||
case StartTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
case a.Html:
|
||||
if p.oe.contains(a.Template) {
|
||||
return true
|
||||
}
|
||||
copyAttributes(p.oe[0], p.tok)
|
||||
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
|
||||
case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
||||
return inHeadIM(p)
|
||||
case a.Body:
|
||||
if p.oe.contains(a.Template) {
|
||||
return true
|
||||
}
|
||||
if len(p.oe) >= 2 {
|
||||
body := p.oe[1]
|
||||
if body.Type == ElementNode && body.DataAtom == a.Body {
|
||||
|
|
@ -767,7 +860,7 @@ func inBodyIM(p *parser) bool {
|
|||
// The newline, if any, will be dealt with by the TextToken case.
|
||||
p.framesetOK = false
|
||||
case a.Form:
|
||||
if p.form == nil {
|
||||
if p.oe.contains(a.Template) || p.form == nil {
|
||||
p.popUntil(buttonScope, a.P)
|
||||
p.addElement()
|
||||
p.form = p.top()
|
||||
|
|
@ -952,11 +1045,16 @@ func inBodyIM(p *parser) bool {
|
|||
}
|
||||
p.reconstructActiveFormattingElements()
|
||||
p.addElement()
|
||||
case a.Rp, a.Rt:
|
||||
case a.Rb, a.Rtc:
|
||||
if p.elementInScope(defaultScope, a.Ruby) {
|
||||
p.generateImpliedEndTags()
|
||||
}
|
||||
p.addElement()
|
||||
case a.Rp, a.Rt:
|
||||
if p.elementInScope(defaultScope, a.Ruby) {
|
||||
p.generateImpliedEndTags("rtc")
|
||||
}
|
||||
p.addElement()
|
||||
case a.Math, a.Svg:
|
||||
p.reconstructActiveFormattingElements()
|
||||
if p.tok.DataAtom == a.Math {
|
||||
|
|
@ -972,7 +1070,13 @@ func inBodyIM(p *parser) bool {
|
|||
p.acknowledgeSelfClosingTag()
|
||||
}
|
||||
return true
|
||||
case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
|
||||
case a.Frame:
|
||||
// TODO: remove this divergence from the HTML5 spec.
|
||||
if p.oe.contains(a.Template) {
|
||||
p.addElement()
|
||||
return true
|
||||
}
|
||||
case a.Caption, a.Col, a.Colgroup, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
|
||||
// Ignore the token.
|
||||
default:
|
||||
p.reconstructActiveFormattingElements()
|
||||
|
|
@ -993,15 +1097,28 @@ func inBodyIM(p *parser) bool {
|
|||
case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
|
||||
p.popUntil(defaultScope, p.tok.DataAtom)
|
||||
case a.Form:
|
||||
node := p.form
|
||||
p.form = nil
|
||||
i := p.indexOfElementInScope(defaultScope, a.Form)
|
||||
if node == nil || i == -1 || p.oe[i] != node {
|
||||
// Ignore the token.
|
||||
return true
|
||||
if p.oe.contains(a.Template) {
|
||||
if !p.oe.contains(a.Form) {
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
p.generateImpliedEndTags()
|
||||
if p.tok.DataAtom == a.Form {
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
p.popUntil(defaultScope, a.Form)
|
||||
} else {
|
||||
node := p.form
|
||||
p.form = nil
|
||||
i := p.indexOfElementInScope(defaultScope, a.Form)
|
||||
if node == nil || i == -1 || p.oe[i] != node {
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
p.generateImpliedEndTags()
|
||||
p.oe.remove(node)
|
||||
}
|
||||
p.generateImpliedEndTags()
|
||||
p.oe.remove(node)
|
||||
case a.P:
|
||||
if !p.elementInScope(buttonScope, a.P) {
|
||||
p.parseImpliedToken(StartTagToken, a.P, a.P.String())
|
||||
|
|
@ -1022,6 +1139,8 @@ func inBodyIM(p *parser) bool {
|
|||
case a.Br:
|
||||
p.tok.Type = StartTagToken
|
||||
return false
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
default:
|
||||
p.inBodyEndTagOther(p.tok.DataAtom)
|
||||
}
|
||||
|
|
@ -1030,6 +1149,21 @@ func inBodyIM(p *parser) bool {
|
|||
Type: CommentNode,
|
||||
Data: p.tok.Data,
|
||||
})
|
||||
case ErrorToken:
|
||||
// TODO: remove this divergence from the HTML5 spec.
|
||||
if len(p.templateStack) > 0 {
|
||||
p.im = inTemplateIM
|
||||
return false
|
||||
} else {
|
||||
for _, e := range p.oe {
|
||||
switch e.DataAtom {
|
||||
case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
|
||||
a.Thead, a.Tr, a.Body, a.Html:
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
|
|
@ -1135,6 +1269,12 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
|
|||
switch commonAncestor.DataAtom {
|
||||
case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
|
||||
p.fosterParent(lastNode)
|
||||
case a.Template:
|
||||
// TODO: remove namespace checking
|
||||
if commonAncestor.Namespace == "html" {
|
||||
commonAncestor = commonAncestor.LastChild
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
commonAncestor.AppendChild(lastNode)
|
||||
}
|
||||
|
|
@ -1160,7 +1300,7 @@ func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
|
|||
}
|
||||
|
||||
// inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
|
||||
// "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
|
||||
// "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
|
||||
func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
|
||||
for i := len(p.oe) - 1; i >= 0; i-- {
|
||||
|
|
@ -1174,7 +1314,7 @@ func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
|
|||
}
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.8.
|
||||
// Section 12.2.6.4.8.
|
||||
func textIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
|
|
@ -1203,7 +1343,7 @@ func textIM(p *parser) bool {
|
|||
return p.tok.Type == EndTagToken
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.9.
|
||||
// Section 12.2.6.4.9.
|
||||
func inTableIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
|
|
@ -1249,7 +1389,7 @@ func inTableIM(p *parser) bool {
|
|||
}
|
||||
// Ignore the token.
|
||||
return true
|
||||
case a.Style, a.Script:
|
||||
case a.Style, a.Script, a.Template:
|
||||
return inHeadIM(p)
|
||||
case a.Input:
|
||||
for _, t := range p.tok.Attr {
|
||||
|
|
@ -1261,7 +1401,7 @@ func inTableIM(p *parser) bool {
|
|||
}
|
||||
// Otherwise drop down to the default action.
|
||||
case a.Form:
|
||||
if p.form != nil {
|
||||
if p.oe.contains(a.Template) || p.form != nil {
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
|
|
@ -1291,6 +1431,8 @@ func inTableIM(p *parser) bool {
|
|||
case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
|
||||
// Ignore the token.
|
||||
return true
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
}
|
||||
case CommentToken:
|
||||
p.addChild(&Node{
|
||||
|
|
@ -1309,7 +1451,7 @@ func inTableIM(p *parser) bool {
|
|||
return inBodyIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.11.
|
||||
// Section 12.2.6.4.11.
|
||||
func inCaptionIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case StartTagToken:
|
||||
|
|
@ -1355,7 +1497,7 @@ func inCaptionIM(p *parser) bool {
|
|||
return inBodyIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.12.
|
||||
// Section 12.2.6.4.12.
|
||||
func inColumnGroupIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -1386,11 +1528,13 @@ func inColumnGroupIM(p *parser) bool {
|
|||
p.oe.pop()
|
||||
p.acknowledgeSelfClosingTag()
|
||||
return true
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
}
|
||||
case EndTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
case a.Colgroup:
|
||||
if p.oe.top().DataAtom != a.Html {
|
||||
if p.oe.top().DataAtom == a.Colgroup {
|
||||
p.oe.pop()
|
||||
p.im = inTableIM
|
||||
}
|
||||
|
|
@ -1398,17 +1542,19 @@ func inColumnGroupIM(p *parser) bool {
|
|||
case a.Col:
|
||||
// Ignore the token.
|
||||
return true
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
}
|
||||
}
|
||||
if p.oe.top().DataAtom != a.Html {
|
||||
p.oe.pop()
|
||||
p.im = inTableIM
|
||||
return false
|
||||
if p.oe.top().DataAtom != a.Colgroup {
|
||||
return true
|
||||
}
|
||||
return true
|
||||
p.oe.pop()
|
||||
p.im = inTableIM
|
||||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.13.
|
||||
// Section 12.2.6.4.13.
|
||||
func inTableBodyIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case StartTagToken:
|
||||
|
|
@ -1460,7 +1606,7 @@ func inTableBodyIM(p *parser) bool {
|
|||
return inTableIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.14.
|
||||
// Section 12.2.6.4.14.
|
||||
func inRowIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case StartTagToken:
|
||||
|
|
@ -1511,7 +1657,7 @@ func inRowIM(p *parser) bool {
|
|||
return inTableIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.15.
|
||||
// Section 12.2.6.4.15.
|
||||
func inCellIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case StartTagToken:
|
||||
|
|
@ -1560,7 +1706,7 @@ func inCellIM(p *parser) bool {
|
|||
return inBodyIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.16.
|
||||
// Section 12.2.6.4.16.
|
||||
func inSelectIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
|
|
@ -1597,7 +1743,7 @@ func inSelectIM(p *parser) bool {
|
|||
p.tokenizer.NextIsNotRawText()
|
||||
// Ignore the token.
|
||||
return true
|
||||
case a.Script:
|
||||
case a.Script, a.Template:
|
||||
return inHeadIM(p)
|
||||
}
|
||||
case EndTagToken:
|
||||
|
|
@ -1618,6 +1764,8 @@ func inSelectIM(p *parser) bool {
|
|||
if p.popUntil(selectScope, a.Select) {
|
||||
p.resetInsertionMode()
|
||||
}
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
}
|
||||
case CommentToken:
|
||||
p.addChild(&Node{
|
||||
|
|
@ -1632,7 +1780,7 @@ func inSelectIM(p *parser) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.17.
|
||||
// Section 12.2.6.4.17.
|
||||
func inSelectInTableIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case StartTagToken, EndTagToken:
|
||||
|
|
@ -1650,7 +1798,62 @@ func inSelectInTableIM(p *parser) bool {
|
|||
return inSelectIM(p)
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.18.
|
||||
// Section 12.2.6.4.18.
|
||||
func inTemplateIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken, CommentToken, DoctypeToken:
|
||||
return inBodyIM(p)
|
||||
case StartTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
|
||||
return inHeadIM(p)
|
||||
case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
|
||||
p.templateStack.pop()
|
||||
p.templateStack = append(p.templateStack, inTableIM)
|
||||
p.im = inTableIM
|
||||
return false
|
||||
case a.Col:
|
||||
p.templateStack.pop()
|
||||
p.templateStack = append(p.templateStack, inColumnGroupIM)
|
||||
p.im = inColumnGroupIM
|
||||
return false
|
||||
case a.Tr:
|
||||
p.templateStack.pop()
|
||||
p.templateStack = append(p.templateStack, inTableBodyIM)
|
||||
p.im = inTableBodyIM
|
||||
return false
|
||||
case a.Td, a.Th:
|
||||
p.templateStack.pop()
|
||||
p.templateStack = append(p.templateStack, inRowIM)
|
||||
p.im = inRowIM
|
||||
return false
|
||||
default:
|
||||
p.templateStack.pop()
|
||||
p.templateStack = append(p.templateStack, inBodyIM)
|
||||
p.im = inBodyIM
|
||||
return false
|
||||
}
|
||||
case EndTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
case a.Template:
|
||||
return inHeadIM(p)
|
||||
default:
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
}
|
||||
if !p.oe.contains(a.Template) {
|
||||
// Ignore the token.
|
||||
return true
|
||||
}
|
||||
p.popUntil(defaultScope, a.Template)
|
||||
p.clearActiveFormattingElements()
|
||||
p.templateStack.pop()
|
||||
p.resetInsertionMode()
|
||||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.6.4.19.
|
||||
func afterBodyIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
|
|
@ -1688,7 +1891,7 @@ func afterBodyIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.19.
|
||||
// Section 12.2.6.4.20.
|
||||
func inFramesetIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case CommentToken:
|
||||
|
|
@ -1720,6 +1923,11 @@ func inFramesetIM(p *parser) bool {
|
|||
p.acknowledgeSelfClosingTag()
|
||||
case a.Noframes:
|
||||
return inHeadIM(p)
|
||||
case a.Template:
|
||||
// TODO: remove this divergence from the HTML5 spec.
|
||||
//
|
||||
// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
|
||||
return inTemplateIM(p)
|
||||
}
|
||||
case EndTagToken:
|
||||
switch p.tok.DataAtom {
|
||||
|
|
@ -1738,7 +1946,7 @@ func inFramesetIM(p *parser) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.20.
|
||||
// Section 12.2.6.4.21.
|
||||
func afterFramesetIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case CommentToken:
|
||||
|
|
@ -1777,7 +1985,7 @@ func afterFramesetIM(p *parser) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.21.
|
||||
// Section 12.2.6.4.22.
|
||||
func afterAfterBodyIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case ErrorToken:
|
||||
|
|
@ -1806,7 +2014,7 @@ func afterAfterBodyIM(p *parser) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Section 12.2.5.4.22.
|
||||
// Section 12.2.6.4.23.
|
||||
func afterAfterFramesetIM(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case CommentToken:
|
||||
|
|
@ -1844,7 +2052,7 @@ func afterAfterFramesetIM(p *parser) bool {
|
|||
|
||||
const whitespaceOrNUL = whitespace + "\x00"
|
||||
|
||||
// Section 12.2.5.5.
|
||||
// Section 12.2.6.5
|
||||
func parseForeignContent(p *parser) bool {
|
||||
switch p.tok.Type {
|
||||
case TextToken:
|
||||
|
|
@ -1924,7 +2132,7 @@ func parseForeignContent(p *parser) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// Section 12.2.5.
|
||||
// Section 12.2.6.
|
||||
func (p *parser) inForeignContent() bool {
|
||||
if len(p.oe) == 0 {
|
||||
return false
|
||||
|
|
@ -2064,6 +2272,9 @@ func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
|
|||
}
|
||||
p.doc.AppendChild(root)
|
||||
p.oe = nodeStack{root}
|
||||
if context != nil && context.DataAtom == a.Template {
|
||||
p.templateStack = append(p.templateStack, inTemplateIM)
|
||||
}
|
||||
p.resetInsertionMode()
|
||||
|
||||
for n := context; n != nil; n = n.Parent {
|
||||
|
|
|
|||
388
vendor/golang.org/x/net/html/parse_test.go
generated
vendored
388
vendor/golang.org/x/net/html/parse_test.go
generated
vendored
|
|
@ -1,388 +0,0 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// readParseTest reads a single test case from r.
|
||||
func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
|
||||
line, err := r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
var b []byte
|
||||
|
||||
// Read the HTML.
|
||||
if string(line) != "#data\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
|
||||
}
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
if line[0] == '#' {
|
||||
break
|
||||
}
|
||||
b = append(b, line...)
|
||||
}
|
||||
text = strings.TrimSuffix(string(b), "\n")
|
||||
b = b[:0]
|
||||
|
||||
// Skip the error list.
|
||||
if string(line) != "#errors\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
|
||||
}
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
if line[0] == '#' {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if string(line) == "#document-fragment\n" {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
context = strings.TrimSpace(string(line))
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Read the dump of what the parse tree should be.
|
||||
if string(line) != "#document\n" {
|
||||
return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
|
||||
}
|
||||
inQuote := false
|
||||
for {
|
||||
line, err = r.ReadSlice('\n')
|
||||
if err != nil && err != io.EOF {
|
||||
return "", "", "", err
|
||||
}
|
||||
trimmed := bytes.Trim(line, "| \n")
|
||||
if len(trimmed) > 0 {
|
||||
if line[0] == '|' && trimmed[0] == '"' {
|
||||
inQuote = true
|
||||
}
|
||||
if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
|
||||
inQuote = false
|
||||
}
|
||||
}
|
||||
if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
|
||||
break
|
||||
}
|
||||
b = append(b, line...)
|
||||
}
|
||||
return text, string(b), context, nil
|
||||
}
|
||||
|
||||
func dumpIndent(w io.Writer, level int) {
|
||||
io.WriteString(w, "| ")
|
||||
for i := 0; i < level; i++ {
|
||||
io.WriteString(w, " ")
|
||||
}
|
||||
}
|
||||
|
||||
type sortedAttributes []Attribute
|
||||
|
||||
func (a sortedAttributes) Len() int {
|
||||
return len(a)
|
||||
}
|
||||
|
||||
func (a sortedAttributes) Less(i, j int) bool {
|
||||
if a[i].Namespace != a[j].Namespace {
|
||||
return a[i].Namespace < a[j].Namespace
|
||||
}
|
||||
return a[i].Key < a[j].Key
|
||||
}
|
||||
|
||||
func (a sortedAttributes) Swap(i, j int) {
|
||||
a[i], a[j] = a[j], a[i]
|
||||
}
|
||||
|
||||
func dumpLevel(w io.Writer, n *Node, level int) error {
|
||||
dumpIndent(w, level)
|
||||
switch n.Type {
|
||||
case ErrorNode:
|
||||
return errors.New("unexpected ErrorNode")
|
||||
case DocumentNode:
|
||||
return errors.New("unexpected DocumentNode")
|
||||
case ElementNode:
|
||||
if n.Namespace != "" {
|
||||
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
|
||||
} else {
|
||||
fmt.Fprintf(w, "<%s>", n.Data)
|
||||
}
|
||||
attr := sortedAttributes(n.Attr)
|
||||
sort.Sort(attr)
|
||||
for _, a := range attr {
|
||||
io.WriteString(w, "\n")
|
||||
dumpIndent(w, level+1)
|
||||
if a.Namespace != "" {
|
||||
fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
|
||||
} else {
|
||||
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
|
||||
}
|
||||
}
|
||||
case TextNode:
|
||||
fmt.Fprintf(w, `"%s"`, n.Data)
|
||||
case CommentNode:
|
||||
fmt.Fprintf(w, "<!-- %s -->", n.Data)
|
||||
case DoctypeNode:
|
||||
fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
|
||||
if n.Attr != nil {
|
||||
var p, s string
|
||||
for _, a := range n.Attr {
|
||||
switch a.Key {
|
||||
case "public":
|
||||
p = a.Val
|
||||
case "system":
|
||||
s = a.Val
|
||||
}
|
||||
}
|
||||
if p != "" || s != "" {
|
||||
fmt.Fprintf(w, ` "%s"`, p)
|
||||
fmt.Fprintf(w, ` "%s"`, s)
|
||||
}
|
||||
}
|
||||
io.WriteString(w, ">")
|
||||
case scopeMarkerNode:
|
||||
return errors.New("unexpected scopeMarkerNode")
|
||||
default:
|
||||
return errors.New("unknown node type")
|
||||
}
|
||||
io.WriteString(w, "\n")
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := dumpLevel(w, c, level+1); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func dump(n *Node) (string, error) {
|
||||
if n == nil || n.FirstChild == nil {
|
||||
return "", nil
|
||||
}
|
||||
var b bytes.Buffer
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
if err := dumpLevel(&b, c, 0); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
const testDataDir = "testdata/webkit/"
|
||||
|
||||
func TestParser(t *testing.T) {
|
||||
testFiles, err := filepath.Glob(testDataDir + "*.dat")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
for _, tf := range testFiles {
|
||||
f, err := os.Open(tf)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer f.Close()
|
||||
r := bufio.NewReader(f)
|
||||
|
||||
for i := 0; ; i++ {
|
||||
text, want, context, err := readParseTest(r)
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = testParseCase(text, want, context)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("%s test #%d %q, %s", tf, i, text, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// testParseCase tests one test case from the test files. If the test does not
|
||||
// pass, it returns an error that explains the failure.
|
||||
// text is the HTML to be parsed, want is a dump of the correct parse tree,
|
||||
// and context is the name of the context node, if any.
|
||||
func testParseCase(text, want, context string) (err error) {
|
||||
defer func() {
|
||||
if x := recover(); x != nil {
|
||||
switch e := x.(type) {
|
||||
case error:
|
||||
err = e
|
||||
default:
|
||||
err = fmt.Errorf("%v", e)
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
var doc *Node
|
||||
if context == "" {
|
||||
doc, err = Parse(strings.NewReader(text))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
contextNode := &Node{
|
||||
Type: ElementNode,
|
||||
DataAtom: atom.Lookup([]byte(context)),
|
||||
Data: context,
|
||||
}
|
||||
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
doc = &Node{
|
||||
Type: DocumentNode,
|
||||
}
|
||||
for _, n := range nodes {
|
||||
doc.AppendChild(n)
|
||||
}
|
||||
}
|
||||
|
||||
if err := checkTreeConsistency(doc); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
got, err := dump(doc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Compare the parsed tree to the #document section.
|
||||
if got != want {
|
||||
return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
|
||||
}
|
||||
|
||||
if renderTestBlacklist[text] || context != "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check that rendering and re-parsing results in an identical tree.
|
||||
pr, pw := io.Pipe()
|
||||
go func() {
|
||||
pw.CloseWithError(Render(pw, doc))
|
||||
}()
|
||||
doc1, err := Parse(pr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
got1, err := dump(doc1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if got != got1 {
|
||||
return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Some test input result in parse trees are not 'well-formed' despite
|
||||
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
|
||||
// tree will not result in an exact clone of that tree. We blacklist such
|
||||
// inputs from the render test.
|
||||
var renderTestBlacklist = map[string]bool{
|
||||
// The second <a> will be reparented to the first <table>'s parent. This
|
||||
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
|
||||
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
|
||||
// The same thing with a <p>:
|
||||
`<p><table></p>`: true,
|
||||
// More cases of <a> being reparented:
|
||||
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
|
||||
`<a><table><a></table><p><a><div><a>`: true,
|
||||
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
|
||||
// A similar reparenting situation involving <nobr>:
|
||||
`<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
|
||||
// A <plaintext> element is reparented, putting it before a table.
|
||||
// A <plaintext> element can't have anything after it in HTML.
|
||||
`<table><plaintext><td>`: true,
|
||||
`<!doctype html><table><plaintext></plaintext>`: true,
|
||||
`<!doctype html><table><tbody><plaintext></plaintext>`: true,
|
||||
`<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
|
||||
// A form inside a table inside a form doesn't work either.
|
||||
`<!doctype html><form><table></form><form></table></form>`: true,
|
||||
// A script that ends at EOF may escape its own closing tag when rendered.
|
||||
`<!doctype html><script><!--<script `: true,
|
||||
`<!doctype html><script><!--<script <`: true,
|
||||
`<!doctype html><script><!--<script <a`: true,
|
||||
`<!doctype html><script><!--<script </`: true,
|
||||
`<!doctype html><script><!--<script </s`: true,
|
||||
`<!doctype html><script><!--<script </script`: true,
|
||||
`<!doctype html><script><!--<script </scripta`: true,
|
||||
`<!doctype html><script><!--<script -`: true,
|
||||
`<!doctype html><script><!--<script -a`: true,
|
||||
`<!doctype html><script><!--<script -<`: true,
|
||||
`<!doctype html><script><!--<script --`: true,
|
||||
`<!doctype html><script><!--<script --a`: true,
|
||||
`<!doctype html><script><!--<script --<`: true,
|
||||
`<script><!--<script `: true,
|
||||
`<script><!--<script <a`: true,
|
||||
`<script><!--<script </script`: true,
|
||||
`<script><!--<script </scripta`: true,
|
||||
`<script><!--<script -`: true,
|
||||
`<script><!--<script -a`: true,
|
||||
`<script><!--<script --`: true,
|
||||
`<script><!--<script --a`: true,
|
||||
`<script><!--<script <`: true,
|
||||
`<script><!--<script </`: true,
|
||||
`<script><!--<script </s`: true,
|
||||
// Reconstructing the active formatting elements results in a <plaintext>
|
||||
// element that contains an <a> element.
|
||||
`<!doctype html><p><a><plaintext>b`: true,
|
||||
}
|
||||
|
||||
func TestNodeConsistency(t *testing.T) {
|
||||
// inconsistentNode is a Node whose DataAtom and Data do not agree.
|
||||
inconsistentNode := &Node{
|
||||
Type: ElementNode,
|
||||
DataAtom: atom.Frameset,
|
||||
Data: "table",
|
||||
}
|
||||
_, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
|
||||
if err == nil {
|
||||
t.Errorf("got nil error, want non-nil")
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkParser(b *testing.B) {
|
||||
buf, err := ioutil.ReadFile("testdata/go1.html")
|
||||
if err != nil {
|
||||
b.Fatalf("could not read testdata/go1.html: %v", err)
|
||||
}
|
||||
b.SetBytes(int64(len(buf)))
|
||||
runtime.GC()
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Parse(bytes.NewBuffer(buf))
|
||||
}
|
||||
}
|
||||
156
vendor/golang.org/x/net/html/render_test.go
generated
vendored
156
vendor/golang.org/x/net/html/render_test.go
generated
vendored
|
|
@ -1,156 +0,0 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestRenderer(t *testing.T) {
|
||||
nodes := [...]*Node{
|
||||
0: {
|
||||
Type: ElementNode,
|
||||
Data: "html",
|
||||
},
|
||||
1: {
|
||||
Type: ElementNode,
|
||||
Data: "head",
|
||||
},
|
||||
2: {
|
||||
Type: ElementNode,
|
||||
Data: "body",
|
||||
},
|
||||
3: {
|
||||
Type: TextNode,
|
||||
Data: "0<1",
|
||||
},
|
||||
4: {
|
||||
Type: ElementNode,
|
||||
Data: "p",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "id",
|
||||
Val: "A",
|
||||
},
|
||||
{
|
||||
Key: "foo",
|
||||
Val: `abc"def`,
|
||||
},
|
||||
},
|
||||
},
|
||||
5: {
|
||||
Type: TextNode,
|
||||
Data: "2",
|
||||
},
|
||||
6: {
|
||||
Type: ElementNode,
|
||||
Data: "b",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "empty",
|
||||
Val: "",
|
||||
},
|
||||
},
|
||||
},
|
||||
7: {
|
||||
Type: TextNode,
|
||||
Data: "3",
|
||||
},
|
||||
8: {
|
||||
Type: ElementNode,
|
||||
Data: "i",
|
||||
Attr: []Attribute{
|
||||
{
|
||||
Key: "backslash",
|
||||
Val: `\`,
|
||||
},
|
||||
},
|
||||
},
|
||||
9: {
|
||||
Type: TextNode,
|
||||
Data: "&4",
|
||||
},
|
||||
10: {
|
||||
Type: TextNode,
|
||||
Data: "5",
|
||||
},
|
||||
11: {
|
||||
Type: ElementNode,
|
||||
Data: "blockquote",
|
||||
},
|
||||
12: {
|
||||
Type: ElementNode,
|
||||
Data: "br",
|
||||
},
|
||||
13: {
|
||||
Type: TextNode,
|
||||
Data: "6",
|
||||
},
|
||||
}
|
||||
|
||||
// Build a tree out of those nodes, based on a textual representation.
|
||||
// Only the ".\t"s are significant. The trailing HTML-like text is
|
||||
// just commentary. The "0:" prefixes are for easy cross-reference with
|
||||
// the nodes array.
|
||||
treeAsText := [...]string{
|
||||
0: `<html>`,
|
||||
1: `. <head>`,
|
||||
2: `. <body>`,
|
||||
3: `. . "0<1"`,
|
||||
4: `. . <p id="A" foo="abc"def">`,
|
||||
5: `. . . "2"`,
|
||||
6: `. . . <b empty="">`,
|
||||
7: `. . . . "3"`,
|
||||
8: `. . . <i backslash="\">`,
|
||||
9: `. . . . "&4"`,
|
||||
10: `. . "5"`,
|
||||
11: `. . <blockquote>`,
|
||||
12: `. . <br>`,
|
||||
13: `. . "6"`,
|
||||
}
|
||||
if len(nodes) != len(treeAsText) {
|
||||
t.Fatal("len(nodes) != len(treeAsText)")
|
||||
}
|
||||
var stack [8]*Node
|
||||
for i, line := range treeAsText {
|
||||
level := 0
|
||||
for line[0] == '.' {
|
||||
// Strip a leading ".\t".
|
||||
line = line[2:]
|
||||
level++
|
||||
}
|
||||
n := nodes[i]
|
||||
if level == 0 {
|
||||
if stack[0] != nil {
|
||||
t.Fatal("multiple root nodes")
|
||||
}
|
||||
stack[0] = n
|
||||
} else {
|
||||
stack[level-1].AppendChild(n)
|
||||
stack[level] = n
|
||||
for i := level + 1; i < len(stack); i++ {
|
||||
stack[i] = nil
|
||||
}
|
||||
}
|
||||
// At each stage of tree construction, we check all nodes for consistency.
|
||||
for j, m := range nodes {
|
||||
if err := checkNodeConsistency(m); err != nil {
|
||||
t.Fatalf("i=%d, j=%d: %v", i, j, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
want := `<html><head></head><body>0<1<p id="A" foo="abc"def">` +
|
||||
`2<b empty="">3</b><i backslash="\">&4</i></p>` +
|
||||
`5<blockquote></blockquote><br/>6</body></html>`
|
||||
b := new(bytes.Buffer)
|
||||
if err := Render(b, nodes[0]); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if got := b.String(); got != want {
|
||||
t.Errorf("got vs want:\n%s\n%s\n", got, want)
|
||||
}
|
||||
}
|
||||
4
vendor/golang.org/x/net/html/token.go
generated
vendored
4
vendor/golang.org/x/net/html/token.go
generated
vendored
|
|
@ -1161,8 +1161,8 @@ func (z *Tokenizer) TagAttr() (key, val []byte, moreAttr bool) {
|
|||
return nil, nil, false
|
||||
}
|
||||
|
||||
// Token returns the next Token. The result's Data and Attr values remain valid
|
||||
// after subsequent Next calls.
|
||||
// Token returns the current Token. The result's Data and Attr values remain
|
||||
// valid after subsequent Next calls.
|
||||
func (z *Tokenizer) Token() Token {
|
||||
t := Token{Type: z.tt}
|
||||
switch z.tt {
|
||||
|
|
|
|||
748
vendor/golang.org/x/net/html/token_test.go
generated
vendored
748
vendor/golang.org/x/net/html/token_test.go
generated
vendored
|
|
@ -1,748 +0,0 @@
|
|||
// Copyright 2010 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package html
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
type tokenTest struct {
|
||||
// A short description of the test case.
|
||||
desc string
|
||||
// The HTML to parse.
|
||||
html string
|
||||
// The string representations of the expected tokens, joined by '$'.
|
||||
golden string
|
||||
}
|
||||
|
||||
var tokenTests = []tokenTest{
|
||||
{
|
||||
"empty",
|
||||
"",
|
||||
"",
|
||||
},
|
||||
// A single text node. The tokenizer should not break text nodes on whitespace,
|
||||
// nor should it normalize whitespace within a text node.
|
||||
{
|
||||
"text",
|
||||
"foo bar",
|
||||
"foo bar",
|
||||
},
|
||||
// An entity.
|
||||
{
|
||||
"entity",
|
||||
"one < two",
|
||||
"one < two",
|
||||
},
|
||||
// A start, self-closing and end tag. The tokenizer does not care if the start
|
||||
// and end tokens don't match; that is the job of the parser.
|
||||
{
|
||||
"tags",
|
||||
"<a>b<c/>d</e>",
|
||||
"<a>$b$<c/>$d$</e>",
|
||||
},
|
||||
// Angle brackets that aren't a tag.
|
||||
{
|
||||
"not a tag #0",
|
||||
"<",
|
||||
"<",
|
||||
},
|
||||
{
|
||||
"not a tag #1",
|
||||
"</",
|
||||
"</",
|
||||
},
|
||||
{
|
||||
"not a tag #2",
|
||||
"</>",
|
||||
"<!---->",
|
||||
},
|
||||
{
|
||||
"not a tag #3",
|
||||
"a</>b",
|
||||
"a$<!---->$b",
|
||||
},
|
||||
{
|
||||
"not a tag #4",
|
||||
"</ >",
|
||||
"<!-- -->",
|
||||
},
|
||||
{
|
||||
"not a tag #5",
|
||||
"</.",
|
||||
"<!--.-->",
|
||||
},
|
||||
{
|
||||
"not a tag #6",
|
||||
"</.>",
|
||||
"<!--.-->",
|
||||
},
|
||||
{
|
||||
"not a tag #7",
|
||||
"a < b",
|
||||
"a < b",
|
||||
},
|
||||
{
|
||||
"not a tag #8",
|
||||
"<.>",
|
||||
"<.>",
|
||||
},
|
||||
{
|
||||
"not a tag #9",
|
||||
"a<<<b>>>c",
|
||||
"a<<$<b>$>>c",
|
||||
},
|
||||
{
|
||||
"not a tag #10",
|
||||
"if x<0 and y < 0 then x*y>0",
|
||||
"if x<0 and y < 0 then x*y>0",
|
||||
},
|
||||
{
|
||||
"not a tag #11",
|
||||
"<<p>",
|
||||
"<$<p>",
|
||||
},
|
||||
// EOF in a tag name.
|
||||
{
|
||||
"tag name eof #0",
|
||||
"<a",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"tag name eof #1",
|
||||
"<a ",
|
||||
"",
|
||||
},
|
||||
{
|
||||
"tag name eof #2",
|
||||
"a<b",
|
||||
"a",
|
||||
},
|
||||
{
|
||||
"tag name eof #3",
|
||||
"<a><b",
|
||||
"<a>",
|
||||
},
|
||||
{
|
||||
"tag name eof #4",
|
||||
`<a x`,
|
||||
``,
|
||||
},
|
||||
// Some malformed tags that are missing a '>'.
|
||||
{
|
||||
"malformed tag #0",
|
||||
`<p</p>`,
|
||||
`<p< p="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #1",
|
||||
`<p </p>`,
|
||||
`<p <="" p="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #2",
|
||||
`<p id`,
|
||||
``,
|
||||
},
|
||||
{
|
||||
"malformed tag #3",
|
||||
`<p id=`,
|
||||
``,
|
||||
},
|
||||
{
|
||||
"malformed tag #4",
|
||||
`<p id=>`,
|
||||
`<p id="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #5",
|
||||
`<p id=0`,
|
||||
``,
|
||||
},
|
||||
{
|
||||
"malformed tag #6",
|
||||
`<p id=0</p>`,
|
||||
`<p id="0</p">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #7",
|
||||
`<p id="0</p>`,
|
||||
``,
|
||||
},
|
||||
{
|
||||
"malformed tag #8",
|
||||
`<p id="0"</p>`,
|
||||
`<p id="0" <="" p="">`,
|
||||
},
|
||||
{
|
||||
"malformed tag #9",
|
||||
`<p></p id`,
|
||||
`<p>`,
|
||||
},
|
||||
// Raw text and RCDATA.
|
||||
{
|
||||
"basic raw text",
|
||||
"<script><a></b></script>",
|
||||
"<script>$<a></b>$</script>",
|
||||
},
|
||||
{
|
||||
"unfinished script end tag",
|
||||
"<SCRIPT>a</SCR",
|
||||
"<script>$a</SCR",
|
||||
},
|
||||
{
|
||||
"broken script end tag",
|
||||
"<SCRIPT>a</SCR ipt>",
|
||||
"<script>$a</SCR ipt>",
|
||||
},
|
||||
{
|
||||
"EOF in script end tag",
|
||||
"<SCRIPT>a</SCRipt",
|
||||
"<script>$a</SCRipt",
|
||||
},
|
||||
{
|
||||
"scriptx end tag",
|
||||
"<SCRIPT>a</SCRiptx",
|
||||
"<script>$a</SCRiptx",
|
||||
},
|
||||
{
|
||||
"' ' completes script end tag",
|
||||
"<SCRIPT>a</SCRipt ",
|
||||
"<script>$a",
|
||||
},
|
||||
{
|
||||
"'>' completes script end tag",
|
||||
"<SCRIPT>a</SCRipt>",
|
||||
"<script>$a$</script>",
|
||||
},
|
||||
{
|
||||
"self-closing script end tag",
|
||||
"<SCRIPT>a</SCRipt/>",
|
||||
"<script>$a$</script>",
|
||||
},
|
||||
{
|
||||
"nested script tag",
|
||||
"<SCRIPT>a</SCRipt<script>",
|
||||
"<script>$a</SCRipt<script>",
|
||||
},
|
||||
{
|
||||
"script end tag after unfinished",
|
||||
"<SCRIPT>a</SCRipt</script>",
|
||||
"<script>$a</SCRipt$</script>",
|
||||
},
|
||||
{
|
||||
"script/style mismatched tags",
|
||||
"<script>a</style>",
|
||||
"<script>$a</style>",
|
||||
},
|
||||
{
|
||||
"style element with entity",
|
||||
"<style>'",
|
||||
"<style>$&apos;",
|
||||
},
|
||||
{
|
||||
"textarea with tag",
|
||||
"<textarea><div></textarea>",
|
||||
"<textarea>$<div>$</textarea>",
|
||||
},
|
||||
{
|
||||
"title with tag and entity",
|
||||
"<title><b>K&R C</b></title>",
|
||||
"<title>$<b>K&R C</b>$</title>",
|
||||
},
|
||||
// DOCTYPE tests.
|
||||
{
|
||||
"Proper DOCTYPE",
|
||||
"<!DOCTYPE html>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"DOCTYPE with no space",
|
||||
"<!doctypehtml>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"DOCTYPE with two spaces",
|
||||
"<!doctype html>",
|
||||
"<!DOCTYPE html>",
|
||||
},
|
||||
{
|
||||
"looks like DOCTYPE but isn't",
|
||||
"<!DOCUMENT html>",
|
||||
"<!--DOCUMENT html-->",
|
||||
},
|
||||
{
|
||||
"DOCTYPE at EOF",
|
||||
"<!DOCtype",
|
||||
"<!DOCTYPE >",
|
||||
},
|
||||
// XML processing instructions.
|
||||
{
|
||||
"XML processing instruction",
|
||||
"<?xml?>",
|
||||
"<!--?xml?-->",
|
||||
},
|
||||
// Comments.
|
||||
{
|
||||
"comment0",
|
||||
"abc<b><!-- skipme --></b>def",
|
||||
"abc$<b>$<!-- skipme -->$</b>$def",
|
||||
},
|
||||
{
|
||||
"comment1",
|
||||
"a<!-->z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment2",
|
||||
"a<!--->z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment3",
|
||||
"a<!--x>-->z",
|
||||
"a$<!--x>-->$z",
|
||||
},
|
||||
{
|
||||
"comment4",
|
||||
"a<!--x->-->z",
|
||||
"a$<!--x->-->$z",
|
||||
},
|
||||
{
|
||||
"comment5",
|
||||
"a<!>z",
|
||||
"a$<!---->$z",
|
||||
},
|
||||
{
|
||||
"comment6",
|
||||
"a<!->z",
|
||||
"a$<!----->$z",
|
||||
},
|
||||
{
|
||||
"comment7",
|
||||
"a<!---<>z",
|
||||
"a$<!---<>z-->",
|
||||
},
|
||||
{
|
||||
"comment8",
|
||||
"a<!--z",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment9",
|
||||
"a<!--z-",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment10",
|
||||
"a<!--z--",
|
||||
"a$<!--z-->",
|
||||
},
|
||||
{
|
||||
"comment11",
|
||||
"a<!--z---",
|
||||
"a$<!--z--->",
|
||||
},
|
||||
{
|
||||
"comment12",
|
||||
"a<!--z----",
|
||||
"a$<!--z---->",
|
||||
},
|
||||
{
|
||||
"comment13",
|
||||
"a<!--x--!>z",
|
||||
"a$<!--x-->$z",
|
||||
},
|
||||
// An attribute with a backslash.
|
||||
{
|
||||
"backslash",
|
||||
`<p id="a\"b">`,
|
||||
`<p id="a\" b"="">`,
|
||||
},
|
||||
// Entities, tag name and attribute key lower-casing, and whitespace
|
||||
// normalization within a tag.
|
||||
{
|
||||
"tricky",
|
||||
"<p \t\n iD=\"a"B\" foo=\"bar\"><EM>te<&;xt</em></p>",
|
||||
`<p id="a"B" foo="bar">$<em>$te<&;xt$</em>$</p>`,
|
||||
},
|
||||
// A nonexistent entity. Tokenizing and converting back to a string should
|
||||
// escape the "&" to become "&".
|
||||
{
|
||||
"noSuchEntity",
|
||||
`<a b="c&noSuchEntity;d"><&alsoDoesntExist;&`,
|
||||
`<a b="c&noSuchEntity;d">$<&alsoDoesntExist;&`,
|
||||
},
|
||||
{
|
||||
"entity without semicolon",
|
||||
`¬it;∉<a b="q=z&=5¬ice=hello¬=world">`,
|
||||
`¬it;∉$<a b="q=z&amp=5&notice=hello¬=world">`,
|
||||
},
|
||||
{
|
||||
"entity with digits",
|
||||
"½",
|
||||
"½",
|
||||
},
|
||||
// Attribute tests:
|
||||
// http://dev.w3.org/html5/pf-summary/Overview.html#attributes
|
||||
{
|
||||
"Empty attribute",
|
||||
`<input disabled FOO>`,
|
||||
`<input disabled="" foo="">`,
|
||||
},
|
||||
{
|
||||
"Empty attribute, whitespace",
|
||||
`<input disabled FOO >`,
|
||||
`<input disabled="" foo="">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value",
|
||||
`<input value=yes FOO=BAR>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value, spaces",
|
||||
`<input value = yes FOO = BAR>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Unquoted attribute value, trailing space",
|
||||
`<input value=yes FOO=BAR >`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Single-quoted attribute value",
|
||||
`<input value='yes' FOO='BAR'>`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Single-quoted attribute value, trailing space",
|
||||
`<input value='yes' FOO='BAR' >`,
|
||||
`<input value="yes" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Double-quoted attribute value",
|
||||
`<input value="I'm an attribute" FOO="BAR">`,
|
||||
`<input value="I'm an attribute" foo="BAR">`,
|
||||
},
|
||||
{
|
||||
"Attribute name characters",
|
||||
`<meta http-equiv="content-type">`,
|
||||
`<meta http-equiv="content-type">`,
|
||||
},
|
||||
{
|
||||
"Mixed attributes",
|
||||
`a<P V="0 1" w='2' X=3 y>z`,
|
||||
`a$<p v="0 1" w="2" x="3" y="">$z`,
|
||||
},
|
||||
{
|
||||
"Attributes with a solitary single quote",
|
||||
`<p id=can't><p id=won't>`,
|
||||
`<p id="can't">$<p id="won't">`,
|
||||
},
|
||||
}
|
||||
|
||||
func TestTokenizer(t *testing.T) {
|
||||
loop:
|
||||
for _, tt := range tokenTests {
|
||||
z := NewTokenizer(strings.NewReader(tt.html))
|
||||
if tt.golden != "" {
|
||||
for i, s := range strings.Split(tt.golden, "$") {
|
||||
if z.Next() == ErrorToken {
|
||||
t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Err())
|
||||
continue loop
|
||||
}
|
||||
actual := z.Token().String()
|
||||
if s != actual {
|
||||
t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual)
|
||||
continue loop
|
||||
}
|
||||
}
|
||||
}
|
||||
z.Next()
|
||||
if z.Err() != io.EOF {
|
||||
t.Errorf("%s: want EOF got %q", tt.desc, z.Err())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaxBuffer(t *testing.T) {
|
||||
// Exceeding the maximum buffer size generates ErrBufferExceeded.
|
||||
z := NewTokenizer(strings.NewReader("<" + strings.Repeat("t", 10)))
|
||||
z.SetMaxBuf(5)
|
||||
tt := z.Next()
|
||||
if got, want := tt, ErrorToken; got != want {
|
||||
t.Fatalf("token type: got: %v want: %v", got, want)
|
||||
}
|
||||
if got, want := z.Err(), ErrBufferExceeded; got != want {
|
||||
t.Errorf("error type: got: %v want: %v", got, want)
|
||||
}
|
||||
if got, want := string(z.Raw()), "<tttt"; got != want {
|
||||
t.Fatalf("buffered before overflow: got: %q want: %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMaxBufferReconstruction(t *testing.T) {
|
||||
// Exceeding the maximum buffer size at any point while tokenizing permits
|
||||
// reconstructing the original input.
|
||||
tests:
|
||||
for _, test := range tokenTests {
|
||||
for maxBuf := 1; ; maxBuf++ {
|
||||
r := strings.NewReader(test.html)
|
||||
z := NewTokenizer(r)
|
||||
z.SetMaxBuf(maxBuf)
|
||||
var tokenized bytes.Buffer
|
||||
for {
|
||||
tt := z.Next()
|
||||
tokenized.Write(z.Raw())
|
||||
if tt == ErrorToken {
|
||||
if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
|
||||
t.Errorf("%s: unexpected error: %v", test.desc, err)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
// Anything tokenized along with untokenized input or data left in the reader.
|
||||
assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
|
||||
if err != nil {
|
||||
t.Errorf("%s: ReadAll: %v", test.desc, err)
|
||||
continue tests
|
||||
}
|
||||
if got, want := string(assembled), test.html; got != want {
|
||||
t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
|
||||
continue tests
|
||||
}
|
||||
// EOF indicates that we completed tokenization and hence found the max
|
||||
// maxBuf that generates ErrBufferExceeded, so continue to the next test.
|
||||
if z.Err() == io.EOF {
|
||||
break
|
||||
}
|
||||
} // buffer sizes
|
||||
} // tests
|
||||
}
|
||||
|
||||
func TestPassthrough(t *testing.T) {
|
||||
// Accumulating the raw output for each parse event should reconstruct the
|
||||
// original input.
|
||||
for _, test := range tokenTests {
|
||||
z := NewTokenizer(strings.NewReader(test.html))
|
||||
var parsed bytes.Buffer
|
||||
for {
|
||||
tt := z.Next()
|
||||
parsed.Write(z.Raw())
|
||||
if tt == ErrorToken {
|
||||
break
|
||||
}
|
||||
}
|
||||
if got, want := parsed.String(), test.html; got != want {
|
||||
t.Errorf("%s: parsed output:\n got: %q\nwant: %q", test.desc, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBufAPI(t *testing.T) {
|
||||
s := "0<a>1</a>2<b>3<a>4<a>5</a>6</b>7</a>8<a/>9"
|
||||
z := NewTokenizer(bytes.NewBufferString(s))
|
||||
var result bytes.Buffer
|
||||
depth := 0
|
||||
loop:
|
||||
for {
|
||||
tt := z.Next()
|
||||
switch tt {
|
||||
case ErrorToken:
|
||||
if z.Err() != io.EOF {
|
||||
t.Error(z.Err())
|
||||
}
|
||||
break loop
|
||||
case TextToken:
|
||||
if depth > 0 {
|
||||
result.Write(z.Text())
|
||||
}
|
||||
case StartTagToken, EndTagToken:
|
||||
tn, _ := z.TagName()
|
||||
if len(tn) == 1 && tn[0] == 'a' {
|
||||
if tt == StartTagToken {
|
||||
depth++
|
||||
} else {
|
||||
depth--
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
u := "14567"
|
||||
v := string(result.Bytes())
|
||||
if u != v {
|
||||
t.Errorf("TestBufAPI: want %q got %q", u, v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConvertNewlines(t *testing.T) {
|
||||
testCases := map[string]string{
|
||||
"Mac\rDOS\r\nUnix\n": "Mac\nDOS\nUnix\n",
|
||||
"Unix\nMac\rDOS\r\n": "Unix\nMac\nDOS\n",
|
||||
"DOS\r\nDOS\r\nDOS\r\n": "DOS\nDOS\nDOS\n",
|
||||
"": "",
|
||||
"\n": "\n",
|
||||
"\n\r": "\n\n",
|
||||
"\r": "\n",
|
||||
"\r\n": "\n",
|
||||
"\r\n\n": "\n\n",
|
||||
"\r\n\r": "\n\n",
|
||||
"\r\n\r\n": "\n\n",
|
||||
"\r\r": "\n\n",
|
||||
"\r\r\n": "\n\n",
|
||||
"\r\r\n\n": "\n\n\n",
|
||||
"\r\r\r\n": "\n\n\n",
|
||||
"\r \n": "\n \n",
|
||||
"xyz": "xyz",
|
||||
}
|
||||
for in, want := range testCases {
|
||||
if got := string(convertNewlines([]byte(in))); got != want {
|
||||
t.Errorf("input %q: got %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestReaderEdgeCases(t *testing.T) {
|
||||
const s = "<p>An io.Reader can return (0, nil) or (n, io.EOF).</p>"
|
||||
testCases := []io.Reader{
|
||||
&zeroOneByteReader{s: s},
|
||||
&eofStringsReader{s: s},
|
||||
&stuckReader{},
|
||||
}
|
||||
for i, tc := range testCases {
|
||||
got := []TokenType{}
|
||||
z := NewTokenizer(tc)
|
||||
for {
|
||||
tt := z.Next()
|
||||
if tt == ErrorToken {
|
||||
break
|
||||
}
|
||||
got = append(got, tt)
|
||||
}
|
||||
if err := z.Err(); err != nil && err != io.EOF {
|
||||
if err != io.ErrNoProgress {
|
||||
t.Errorf("i=%d: %v", i, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
want := []TokenType{
|
||||
StartTagToken,
|
||||
TextToken,
|
||||
EndTagToken,
|
||||
}
|
||||
if !reflect.DeepEqual(got, want) {
|
||||
t.Errorf("i=%d: got %v, want %v", i, got, want)
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// zeroOneByteReader is like a strings.Reader that alternates between
|
||||
// returning 0 bytes and 1 byte at a time.
|
||||
type zeroOneByteReader struct {
|
||||
s string
|
||||
n int
|
||||
}
|
||||
|
||||
func (r *zeroOneByteReader) Read(p []byte) (int, error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if len(r.s) == 0 {
|
||||
return 0, io.EOF
|
||||
}
|
||||
r.n++
|
||||
if r.n%2 != 0 {
|
||||
return 0, nil
|
||||
}
|
||||
p[0], r.s = r.s[0], r.s[1:]
|
||||
return 1, nil
|
||||
}
|
||||
|
||||
// eofStringsReader is like a strings.Reader but can return an (n, err) where
|
||||
// n > 0 && err != nil.
|
||||
type eofStringsReader struct {
|
||||
s string
|
||||
}
|
||||
|
||||
func (r *eofStringsReader) Read(p []byte) (int, error) {
|
||||
n := copy(p, r.s)
|
||||
r.s = r.s[n:]
|
||||
if r.s != "" {
|
||||
return n, nil
|
||||
}
|
||||
return n, io.EOF
|
||||
}
|
||||
|
||||
// stuckReader is an io.Reader that always returns no data and no error.
|
||||
type stuckReader struct{}
|
||||
|
||||
func (*stuckReader) Read(p []byte) (int, error) {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
const (
|
||||
rawLevel = iota
|
||||
lowLevel
|
||||
highLevel
|
||||
)
|
||||
|
||||
func benchmarkTokenizer(b *testing.B, level int) {
|
||||
buf, err := ioutil.ReadFile("testdata/go1.html")
|
||||
if err != nil {
|
||||
b.Fatalf("could not read testdata/go1.html: %v", err)
|
||||
}
|
||||
b.SetBytes(int64(len(buf)))
|
||||
runtime.GC()
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
z := NewTokenizer(bytes.NewBuffer(buf))
|
||||
for {
|
||||
tt := z.Next()
|
||||
if tt == ErrorToken {
|
||||
if err := z.Err(); err != nil && err != io.EOF {
|
||||
b.Fatalf("tokenizer error: %v", err)
|
||||
}
|
||||
break
|
||||
}
|
||||
switch level {
|
||||
case rawLevel:
|
||||
// Calling z.Raw just returns the raw bytes of the token. It does
|
||||
// not unescape < to <, or lower-case tag names and attribute keys.
|
||||
z.Raw()
|
||||
case lowLevel:
|
||||
// Caling z.Text, z.TagName and z.TagAttr returns []byte values
|
||||
// whose contents may change on the next call to z.Next.
|
||||
switch tt {
|
||||
case TextToken, CommentToken, DoctypeToken:
|
||||
z.Text()
|
||||
case StartTagToken, SelfClosingTagToken:
|
||||
_, more := z.TagName()
|
||||
for more {
|
||||
_, _, more = z.TagAttr()
|
||||
}
|
||||
case EndTagToken:
|
||||
z.TagName()
|
||||
}
|
||||
case highLevel:
|
||||
// Calling z.Token converts []byte values to strings whose validity
|
||||
// extend beyond the next call to z.Next.
|
||||
z.Token()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }
|
||||
func BenchmarkLowLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, lowLevel) }
|
||||
func BenchmarkHighLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, highLevel) }
|
||||
Loading…
Add table
Add a link
Reference in a new issue