mirror of
https://github.com/rembo10/headphones.git
synced 2026-05-21 02:55:31 +01:00
Include html5lib for BeautifulSoup
BeautifulSoup needs lxml or html5, have included html5lib. Also latest BeautifulSoup 4.1.3
This commit is contained in:
75
html5lib/tests/testdata/tokenizer/contentModelFlags.test
vendored
Normal file
75
html5lib/tests/testdata/tokenizer/contentModelFlags.test
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"PLAINTEXT content model flag",
|
||||
"initialStates":["PLAINTEXT state"],
|
||||
"lastStartTag":"plaintext",
|
||||
"input":"<head>&body;",
|
||||
"output":[["Character", "<head>&body;"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xMp>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp ",
|
||||
"output":[["Character", "foo"], "ParseError"]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp",
|
||||
"output":[["Character", "foo</xmp"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp/",
|
||||
"output":[["Character", "foo"], "ParseError"]},
|
||||
|
||||
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp<",
|
||||
"output":[["Character", "foo</xmp<"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</foo>bar</xmp>",
|
||||
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</foo>bar</xmpaar>",
|
||||
"output":[["Character", "</foo>bar</xmpaar>"]]},
|
||||
|
||||
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo</xmp></baz>",
|
||||
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
|
||||
|
||||
{"description":"RAWTEXT w/ something looking like an entity",
|
||||
"initialStates":["RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"&foo;",
|
||||
"output":[["Character", "&foo;"]]},
|
||||
|
||||
{"description":"RCDATA w/ an entity",
|
||||
"initialStates":["RCDATA state"],
|
||||
"lastStartTag":"textarea",
|
||||
"input":"<",
|
||||
"output":[["Character", "<"]]}
|
||||
|
||||
]}
|
||||
90
html5lib/tests/testdata/tokenizer/domjs.test
vendored
Normal file
90
html5lib/tests/testdata/tokenizer/domjs.test
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"tests": [
|
||||
{
|
||||
"description":"CR in bogus comment state",
|
||||
"input":"<?\u000d",
|
||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
||||
},
|
||||
{
|
||||
"description":"CRLF in bogus comment state",
|
||||
"input":"<?\u000d\u000a",
|
||||
"output":["ParseError", ["Comment", "?\u000a"]]
|
||||
},
|
||||
{
|
||||
"description":"NUL in RCDATA and RAWTEXT",
|
||||
"doubleEscaped":true,
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"input":"\\u0000",
|
||||
"output":["ParseError", ["Character", "\\uFFFD"]]
|
||||
},
|
||||
{
|
||||
"description":"skip first BOM but not later ones",
|
||||
"input":"\uFEFFfoo\uFEFFbar",
|
||||
"output":[["Character", "foo\uFEFFbar"]]
|
||||
},
|
||||
{
|
||||
"description":"Non BMP-charref in in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"input":"≂̸",
|
||||
"output":[["Character", "\u2242\u0338"]]
|
||||
},
|
||||
{
|
||||
"description":"Bad charref in in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"input":"&NotEqualTild;",
|
||||
"output":["ParseError", ["Character", "&NotEqualTild;"]]
|
||||
},
|
||||
{
|
||||
"description":"lowercase endtags in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</XMP>",
|
||||
"output":[["EndTag","xmp"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</ XMP>",
|
||||
"output":[["Character","</ XMP>"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm>",
|
||||
"output":[["Character","</xm>"]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm ",
|
||||
"output":[["Character","</xm "]]
|
||||
},
|
||||
{
|
||||
"description":"bad endtag in RCDATA and RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"</xm/",
|
||||
"output":[["Character","</xm/"]]
|
||||
},
|
||||
{
|
||||
"description":"Non BMP-charref in attribute",
|
||||
"input":"<p id=\"≂̸\">",
|
||||
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
|
||||
},
|
||||
{
|
||||
"description":"--!NUL in comment ",
|
||||
"doubleEscaped":true,
|
||||
"input":"<!----!\\u0000-->",
|
||||
"output":["ParseError", ["Comment", "--!\\uFFFD"]]
|
||||
},
|
||||
{
|
||||
"description":"space EOF after doctype ",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null , false]]
|
||||
}
|
||||
|
||||
]
|
||||
}
|
||||
283
html5lib/tests/testdata/tokenizer/entities.test
vendored
Normal file
283
html5lib/tests/testdata/tokenizer/entities.test
vendored
Normal file
@@ -0,0 +1,283 @@
|
||||
{"tests": [
|
||||
|
||||
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
|
||||
"input":"<h a='¬i;'>",
|
||||
"output": ["ParseError", ["StartTag", "h", {"a": "¬i;"}]]},
|
||||
|
||||
{"description": "Entity name followed by the equals sign in an attribute value.",
|
||||
"input":"<h a='&lang='>",
|
||||
"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]},
|
||||
|
||||
{"description": "CR as numeric entity",
|
||||
"input":"
",
|
||||
"output": ["ParseError", ["Character", "\r"]]},
|
||||
|
||||
{"description": "CR as hexadecimal numeric entity",
|
||||
"input":"
",
|
||||
"output": ["ParseError", ["Character", "\r"]]},
|
||||
|
||||
{"description": "Windows-1252 EURO SIGN numeric entity.",
|
||||
"input":"€",
|
||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
|
||||
"input":"‚",
|
||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
|
||||
"input":"ƒ",
|
||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
|
||||
"input":"„",
|
||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
||||
|
||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
|
||||
"input":"…",
|
||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
||||
|
||||
{"description": "Windows-1252 DAGGER numeric entity.",
|
||||
"input":"†",
|
||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
|
||||
"input":"‡",
|
||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
||||
|
||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
|
||||
"input":"ˆ",
|
||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
||||
|
||||
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
|
||||
"input":"‰",
|
||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
|
||||
"input":"Š",
|
||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
||||
"input":"‹",
|
||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
|
||||
"input":"Œ",
|
||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
|
||||
"input":"Ž",
|
||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
|
||||
"input":"‘",
|
||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
|
||||
"input":"’",
|
||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
|
||||
"input":"“",
|
||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
|
||||
"input":"”",
|
||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
||||
|
||||
{"description": "Windows-1252 BULLET numeric entity.",
|
||||
"input":"•",
|
||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
||||
|
||||
{"description": "Windows-1252 EN DASH numeric entity.",
|
||||
"input":"–",
|
||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
||||
|
||||
{"description": "Windows-1252 EM DASH numeric entity.",
|
||||
"input":"—",
|
||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
||||
|
||||
{"description": "Windows-1252 SMALL TILDE numeric entity.",
|
||||
"input":"˜",
|
||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
||||
|
||||
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
|
||||
"input":"™",
|
||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
|
||||
"input":"š",
|
||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
|
||||
"input":"›",
|
||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
|
||||
"input":"œ",
|
||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
||||
|
||||
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
|
||||
"input":"€",
|
||||
"output": ["ParseError", ["Character", "\u20AC"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0081"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‚",
|
||||
"output": ["ParseError", ["Character", "\u201A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
|
||||
"input":"ƒ",
|
||||
"output": ["ParseError", ["Character", "\u0192"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"„",
|
||||
"output": ["ParseError", ["Character", "\u201E"]]},
|
||||
|
||||
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
|
||||
"input":"…",
|
||||
"output": ["ParseError", ["Character", "\u2026"]]},
|
||||
|
||||
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
|
||||
"input":"†",
|
||||
"output": ["ParseError", ["Character", "\u2020"]]},
|
||||
|
||||
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
|
||||
"input":"‡",
|
||||
"output": ["ParseError", ["Character", "\u2021"]]},
|
||||
|
||||
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
|
||||
"input":"ˆ",
|
||||
"output": ["ParseError", ["Character", "\u02C6"]]},
|
||||
|
||||
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
|
||||
"input":"‰",
|
||||
"output": ["ParseError", ["Character", "\u2030"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
|
||||
"input":"Š",
|
||||
"output": ["ParseError", ["Character", "\u0160"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‹",
|
||||
"output": ["ParseError", ["Character", "\u2039"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
|
||||
"input":"Œ",
|
||||
"output": ["ParseError", ["Character", "\u0152"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
|
||||
"input":"Ž",
|
||||
"output": ["ParseError", ["Character", "\u017D"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u008F"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u0090"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"‘",
|
||||
"output": ["ParseError", ["Character", "\u2018"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"’",
|
||||
"output": ["ParseError", ["Character", "\u2019"]]},
|
||||
|
||||
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"“",
|
||||
"output": ["ParseError", ["Character", "\u201C"]]},
|
||||
|
||||
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"”",
|
||||
"output": ["ParseError", ["Character", "\u201D"]]},
|
||||
|
||||
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
|
||||
"input":"•",
|
||||
"output": ["ParseError", ["Character", "\u2022"]]},
|
||||
|
||||
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
|
||||
"input":"–",
|
||||
"output": ["ParseError", ["Character", "\u2013"]]},
|
||||
|
||||
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
|
||||
"input":"—",
|
||||
"output": ["ParseError", ["Character", "\u2014"]]},
|
||||
|
||||
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
|
||||
"input":"˜",
|
||||
"output": ["ParseError", ["Character", "\u02DC"]]},
|
||||
|
||||
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
|
||||
"input":"™",
|
||||
"output": ["ParseError", ["Character", "\u2122"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
|
||||
"input":"š",
|
||||
"output": ["ParseError", ["Character", "\u0161"]]},
|
||||
|
||||
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
|
||||
"input":"›",
|
||||
"output": ["ParseError", ["Character", "\u203A"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
|
||||
"input":"œ",
|
||||
"output": ["ParseError", ["Character", "\u0153"]]},
|
||||
|
||||
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
|
||||
"input":"",
|
||||
"output": ["ParseError", ["Character", "\u009D"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
|
||||
"input":"ž",
|
||||
"output": ["ParseError", ["Character", "\u017E"]]},
|
||||
|
||||
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
|
||||
"input":"Ÿ",
|
||||
"output": ["ParseError", ["Character", "\u0178"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character a.",
|
||||
"input":"aa",
|
||||
"output": ["ParseError", ["Character", "aa"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character A.",
|
||||
"input":"aA",
|
||||
"output": ["ParseError", ["Character", "aA"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character f.",
|
||||
"input":"af",
|
||||
"output": ["ParseError", ["Character", "af"]]},
|
||||
|
||||
{"description": "Decimal numeric entity followed by hex character A.",
|
||||
"input":"aF",
|
||||
"output": ["ParseError", ["Character", "aF"]]}
|
||||
|
||||
]}
|
||||
33
html5lib/tests/testdata/tokenizer/escapeFlag.test
vendored
Normal file
33
html5lib/tests/testdata/tokenizer/escapeFlag.test
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"Commented close tag in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!--</xmp>--></xmp>",
|
||||
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Bogus comment in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!-->baz</xmp>",
|
||||
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!--></xmp><!-->baz</xmp>",
|
||||
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Commented entities in RCDATA",
|
||||
"initialStates":["RCDATA state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":" & <!-- & --> & </xmp>",
|
||||
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
|
||||
|
||||
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
|
||||
"initialStates":["RCDATA state", "RAWTEXT state"],
|
||||
"lastStartTag":"xmp",
|
||||
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
|
||||
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
|
||||
|
||||
]}
|
||||
44189
html5lib/tests/testdata/tokenizer/namedEntities.test
vendored
Normal file
44189
html5lib/tests/testdata/tokenizer/namedEntities.test
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1313
html5lib/tests/testdata/tokenizer/numericEntities.test
vendored
Normal file
1313
html5lib/tests/testdata/tokenizer/numericEntities.test
vendored
Normal file
File diff suppressed because it is too large
Load Diff
7
html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
vendored
Normal file
7
html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"<!---- >",
|
||||
"input":"<!---- >",
|
||||
"output":["ParseError", "ParseError", ["Comment","-- >"]]}
|
||||
|
||||
]}
|
||||
196
html5lib/tests/testdata/tokenizer/test1.test
vendored
Normal file
196
html5lib/tests/testdata/tokenizer/test1.test
vendored
Normal file
@@ -0,0 +1,196 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"Correct Doctype lowercase",
|
||||
"input":"<!DOCTYPE html>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype uppercase",
|
||||
"input":"<!DOCTYPE HTML>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype mixed case",
|
||||
"input":"<!DOCTYPE HtMl>",
|
||||
"output":[["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Correct Doctype case with EOF",
|
||||
"input":"<!DOCTYPE HtMl",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Truncated doctype start",
|
||||
"input":"<!DOC>",
|
||||
"output":["ParseError", ["Comment", "DOC"]]},
|
||||
|
||||
{"description":"Doctype in error",
|
||||
"input":"<!DOCTYPE foo>",
|
||||
"output":[["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"Single Start Tag",
|
||||
"input":"<h>",
|
||||
"output":[["StartTag", "h", {}]]},
|
||||
|
||||
{"description":"Empty end tag",
|
||||
"input":"</>",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Empty start tag",
|
||||
"input":"<>",
|
||||
"output":["ParseError", ["Character", "<>"]]},
|
||||
|
||||
{"description":"Start Tag w/attribute",
|
||||
"input":"<h a='b'>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start Tag w/attribute no quotes",
|
||||
"input":"<h a=b>",
|
||||
"output":[["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Start/End Tag",
|
||||
"input":"<h></h>",
|
||||
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Two unclosed start tags",
|
||||
"input":"<p>One<p>Two",
|
||||
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
|
||||
|
||||
{"description":"End Tag w/attribute",
|
||||
"input":"<h></h a='b'>",
|
||||
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Multiple atts",
|
||||
"input":"<h a='b' c='d'>",
|
||||
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Multiple atts no space",
|
||||
"input":"<h a='b'c='d'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
|
||||
|
||||
{"description":"Repeated attr",
|
||||
"input":"<h a='b' a='d'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
|
||||
|
||||
{"description":"Simple comment",
|
||||
"input":"<!--comment-->",
|
||||
"output":[["Comment", "comment"]]},
|
||||
|
||||
{"description":"Comment, Central dash no space",
|
||||
"input":"<!----->",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Comment, two central dashes",
|
||||
"input":"<!-- --comment -->",
|
||||
"output":["ParseError", ["Comment", " --comment "]]},
|
||||
|
||||
{"description":"Unfinished comment",
|
||||
"input":"<!--comment",
|
||||
"output":["ParseError", ["Comment", "comment"]]},
|
||||
|
||||
{"description":"Start of a comment",
|
||||
"input":"<!-",
|
||||
"output":["ParseError", ["Comment", "-"]]},
|
||||
|
||||
{"description":"Short comment",
|
||||
"input":"<!-->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment two",
|
||||
"input":"<!--->",
|
||||
"output":["ParseError", ["Comment", ""]]},
|
||||
|
||||
{"description":"Short comment three",
|
||||
"input":"<!---->",
|
||||
"output":[["Comment", ""]]},
|
||||
|
||||
|
||||
{"description":"Ampersand EOF",
|
||||
"input":"&",
|
||||
"output":[["Character", "&"]]},
|
||||
|
||||
{"description":"Ampersand ampersand EOF",
|
||||
"input":"&&",
|
||||
"output":[["Character", "&&"]]},
|
||||
|
||||
{"description":"Ampersand space EOF",
|
||||
"input":"& ",
|
||||
"output":[["Character", "& "]]},
|
||||
|
||||
{"description":"Unfinished entity",
|
||||
"input":"&f",
|
||||
"output":["ParseError", ["Character", "&f"]]},
|
||||
|
||||
{"description":"Ampersand, number sign",
|
||||
"input":"&#",
|
||||
"output":["ParseError", ["Character", "&#"]]},
|
||||
|
||||
{"description":"Unfinished numeric entity",
|
||||
"input":"&#x",
|
||||
"output":["ParseError", ["Character", "&#x"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm \u00ACit"]]},
|
||||
|
||||
{"description":"Entity with trailing semicolon (2)",
|
||||
"input":"I'm ∉",
|
||||
"output":[["Character","I'm \u2209"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (1)",
|
||||
"input":"I'm ¬it",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
|
||||
|
||||
{"description":"Entity without trailing semicolon (2)",
|
||||
"input":"I'm ¬in",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
|
||||
|
||||
{"description":"Partial entity match at end of file",
|
||||
"input":"I'm &no",
|
||||
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
|
||||
|
||||
{"description":"Non-ASCII character reference name",
|
||||
"input":"&\u00AC;",
|
||||
"output":["ParseError", ["Character", "&\u00AC;"]]},
|
||||
|
||||
{"description":"ASCII decimal entity",
|
||||
"input":"$",
|
||||
"output":[["Character","$"]]},
|
||||
|
||||
{"description":"ASCII hexadecimal entity",
|
||||
"input":"?",
|
||||
"output":[["Character","?"]]},
|
||||
|
||||
{"description":"Hexadecimal entity in attribute",
|
||||
"input":"<h a='?'></h>",
|
||||
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in x",
|
||||
"input":"<h a='¬x'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬x"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in 1",
|
||||
"input":"<h a='¬1'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬1"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon ending in i",
|
||||
"input":"<h a='¬i'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"¬i"}]]},
|
||||
|
||||
{"description":"Entity in attribute without semicolon",
|
||||
"input":"<h a='©'>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
|
||||
|
||||
{"description":"Unquoted attribute ending in ampersand",
|
||||
"input":"<s o=& t>",
|
||||
"output":[["StartTag","s",{"o":"&","t":""}]]},
|
||||
|
||||
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
|
||||
"input":"<a a=a&>foo",
|
||||
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
|
||||
|
||||
{"description":"plaintext element",
|
||||
"input":"<plaintext>foobar",
|
||||
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
|
||||
|
||||
{"description":"Open angled bracket in unquoted attribute value state",
|
||||
"input":"<a a=f<>",
|
||||
"output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
|
||||
|
||||
]}
|
||||
179
html5lib/tests/testdata/tokenizer/test2.test
vendored
Normal file
179
html5lib/tests/testdata/tokenizer/test2.test
vendored
Normal file
@@ -0,0 +1,179 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"DOCTYPE without name",
|
||||
"input":"<!DOCTYPE>",
|
||||
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE without space before name",
|
||||
"input":"<!DOCTYPEhtml>",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
|
||||
|
||||
{"description":"Incorrect DOCTYPE without a space before name",
|
||||
"input":"<!DOCTYPEfoo>",
|
||||
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC",
|
||||
"input":"<!DOCTYPE html PUBLIC",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC '",
|
||||
"input":"<!DOCTYPE html PUBLIC '",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with EOF after PUBLIC 'x",
|
||||
"input":"<!DOCTYPE html PUBLIC 'x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
|
||||
|
||||
{"description":"DOCTYPE with systemId",
|
||||
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with publicId and systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
|
||||
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
|
||||
|
||||
{"description":"DOCTYPE with > in double-quoted publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC \">x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in single-quoted publicId",
|
||||
"input":"<!DOCTYPE html PUBLIC '>x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in double-quoted systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
||||
|
||||
{"description":"DOCTYPE with > in single-quoted systemId",
|
||||
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
|
||||
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
|
||||
|
||||
{"description":"Incomplete doctype",
|
||||
"input":"<!DOCTYPE html ",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Numeric entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing the NUL character",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity pair representing a surrogate pair",
|
||||
"input":"��",
|
||||
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
|
||||
"input":"ꯍ",
|
||||
"output":[["Character", "\uABCD"]]},
|
||||
|
||||
{"description":"Entity without a name",
|
||||
"input":"&;",
|
||||
"output":["ParseError", ["Character", "&;"]]},
|
||||
|
||||
{"description":"Unescaped ampersand in attribute value",
|
||||
"input":"<h a='&'>",
|
||||
"output":[["StartTag", "h", { "a":"&" }]]},
|
||||
|
||||
{"description":"StartTag containing <",
|
||||
"input":"<a<b>",
|
||||
"output":[["StartTag", "a<b", { }]]},
|
||||
|
||||
{"description":"Non-void element containing trailing /",
|
||||
"input":"<h/>",
|
||||
"output":[["StartTag","h",{},true]]},
|
||||
|
||||
{"description":"Void element with permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag","br",{},true]]},
|
||||
|
||||
{"description":"Void element with permitted slash (with attribute)",
|
||||
"input":"<br foo='bar'/>",
|
||||
"output":[["StartTag","br",{"foo":"bar"},true]]},
|
||||
|
||||
{"description":"StartTag containing /",
|
||||
"input":"<h/a='b'>",
|
||||
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Double-quoted attribute value",
|
||||
"input":"<h a=\"b\">",
|
||||
"output":[["StartTag", "h", { "a":"b" }]]},
|
||||
|
||||
{"description":"Unescaped </",
|
||||
"input":"</",
|
||||
"output":["ParseError", ["Character", "</"]]},
|
||||
|
||||
{"description":"Illegal end tag name",
|
||||
"input":"</1>",
|
||||
"output":["ParseError", ["Comment", "1"]]},
|
||||
|
||||
{"description":"Simili processing instruction",
|
||||
"input":"<?namespace>",
|
||||
"output":["ParseError", ["Comment", "?namespace"]]},
|
||||
|
||||
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
|
||||
"input":"<?foo-->",
|
||||
"output":["ParseError", ["Comment", "?foo--"]]},
|
||||
|
||||
{"description":"Unescaped <",
|
||||
"input":"foo < bar",
|
||||
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
|
||||
|
||||
{"description":"Null Byte Replacement",
|
||||
"input":"\u0000",
|
||||
"output":["ParseError", ["Character", "\u0000"]]},
|
||||
|
||||
{"description":"Comment with dash",
|
||||
"input":"<!---x",
|
||||
"output":["ParseError", ["Comment", "-x"]]},
|
||||
|
||||
{"description":"Entity + newline",
|
||||
"input":"\nx\n>\n",
|
||||
"output":[["Character","\nx\n>\n"]]},
|
||||
|
||||
{"description":"Start tag with no attributes but space before the greater-than sign",
|
||||
"input":"<h >",
|
||||
"output":[["StartTag", "h", {}]]},
|
||||
|
||||
{"description":"Empty attribute followed by uppercase attribute",
|
||||
"input":"<h a B=''>",
|
||||
"output":[["StartTag", "h", {"a":"", "b":""}]]},
|
||||
|
||||
{"description":"Double-quote after attribute name",
|
||||
"input":"<h a \">",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
|
||||
|
||||
{"description":"Single-quote after attribute name",
|
||||
"input":"<h a '>",
|
||||
"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
|
||||
|
||||
{"description":"Empty end tag with following characters",
|
||||
"input":"a</>bc",
|
||||
"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
|
||||
|
||||
{"description":"Empty end tag with following tag",
|
||||
"input":"a</><b>c",
|
||||
"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
|
||||
|
||||
{"description":"Empty end tag with following comment",
|
||||
"input":"a</><!--b-->c",
|
||||
"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
|
||||
|
||||
{"description":"Empty end tag with following end tag",
|
||||
"input":"a</></b>c",
|
||||
"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
|
||||
|
||||
]}
|
||||
6047
html5lib/tests/testdata/tokenizer/test3.test
vendored
Normal file
6047
html5lib/tests/testdata/tokenizer/test3.test
vendored
Normal file
File diff suppressed because it is too large
Load Diff
344
html5lib/tests/testdata/tokenizer/test4.test
vendored
Normal file
344
html5lib/tests/testdata/tokenizer/test4.test
vendored
Normal file
@@ -0,0 +1,344 @@
|
||||
{"tests": [
|
||||
|
||||
{"description":"< in attribute name",
|
||||
"input":"<z/0 <>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
|
||||
|
||||
{"description":"< in attribute value",
|
||||
"input":"<z x=<>",
|
||||
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
|
||||
|
||||
{"description":"= in unquoted attribute value",
|
||||
"input":"<z z=z=z>",
|
||||
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
|
||||
|
||||
{"description":"= attribute",
|
||||
"input":"<z =>",
|
||||
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
|
||||
|
||||
{"description":"== attribute",
|
||||
"input":"<z ==>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
|
||||
|
||||
{"description":"=== attribute",
|
||||
"input":"<z ===>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
|
||||
|
||||
{"description":"==== attribute",
|
||||
"input":"<z ====>",
|
||||
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
|
||||
|
||||
{"description":"Allowed \" after ampersand in attribute value",
|
||||
"input":"<z z=\"&\">",
|
||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
||||
|
||||
{"description":"Non-allowed ' after ampersand in attribute value",
|
||||
"input":"<z z=\"&'\">",
|
||||
"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
|
||||
|
||||
{"description":"Allowed ' after ampersand in attribute value",
|
||||
"input":"<z z='&'>",
|
||||
"output":[["StartTag", "z", {"z": "&"}]]},
|
||||
|
||||
{"description":"Non-allowed \" after ampersand in attribute value",
|
||||
"input":"<z z='&\"'>",
|
||||
"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
|
||||
|
||||
{"description":"Text after bogus character reference",
|
||||
"input":"<z z='&xlink_xmlns;'>bar<z>",
|
||||
"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
|
||||
|
||||
{"description":"Text after hex character reference",
|
||||
"input":"<z z='  foo'>bar<z>",
|
||||
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
|
||||
|
||||
{"description":"Attribute name starting with \"",
|
||||
"input":"<foo \"='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name starting with '",
|
||||
"input":"<foo '='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name containing \"",
|
||||
"input":"<foo a\"b='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
|
||||
|
||||
{"description":"Attribute name containing '",
|
||||
"input":"<foo a'b='bar'>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
|
||||
|
||||
{"description":"Unquoted attribute value containing '",
|
||||
"input":"<foo a=b'c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
|
||||
|
||||
{"description":"Unquoted attribute value containing \"",
|
||||
"input":"<foo a=b\"c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
|
||||
|
||||
{"description":"Double-quoted attribute value not followed by whitespace",
|
||||
"input":"<foo a=\"b\"c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
||||
|
||||
{"description":"Single-quoted attribute value not followed by whitespace",
|
||||
"input":"<foo a='b'c>",
|
||||
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
|
||||
|
||||
{"description":"Quoted attribute followed by permitted /",
|
||||
"input":"<br a='b'/>",
|
||||
"output":[["StartTag","br",{"a":"b"},true]]},
|
||||
|
||||
{"description":"Quoted attribute followed by non-permitted /",
|
||||
"input":"<bar a='b'/>",
|
||||
"output":[["StartTag","bar",{"a":"b"},true]]},
|
||||
|
||||
{"description":"CR EOF after doctype name",
|
||||
"input":"<!doctype html \r",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"CR EOF in tag name",
|
||||
"input":"<z\r",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Slash EOF in tag name",
|
||||
"input":"<z/",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"Zero hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Zero-prefixed hex numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Zero-prefixed decimal numeric entity",
|
||||
"input":"A",
|
||||
"output":[["Character", "A"]]},
|
||||
|
||||
{"description":"Empty hex numeric entities",
|
||||
"input":"&#x &#X ",
|
||||
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
|
||||
|
||||
{"description":"Empty decimal numeric entities",
|
||||
"input":"&# &#; ",
|
||||
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
|
||||
|
||||
{"description":"Non-BMP numeric entity",
|
||||
"input":"𐀀",
|
||||
"output":[["Character", "\uD800\uDC00"]]},
|
||||
|
||||
{"description":"Maximum non-BMP numeric entity",
|
||||
"input":"",
|
||||
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
|
||||
|
||||
{"description":"Above maximum numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"32-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"33-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit hex numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"65-bit decimal numeric entity",
|
||||
"input":"�",
|
||||
"output":["ParseError", ["Character", "\uFFFD"]]},
|
||||
|
||||
{"description":"Surrogate code point edge cases",
|
||||
"input":"퟿����",
|
||||
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
|
||||
|
||||
{"description":"Uppercase start tag name",
|
||||
"input":"<X>",
|
||||
"output":[["StartTag", "x", {}]]},
|
||||
|
||||
{"description":"Uppercase end tag name",
|
||||
"input":"</X>",
|
||||
"output":[["EndTag", "x"]]},
|
||||
|
||||
{"description":"Uppercase attribute name",
|
||||
"input":"<x X>",
|
||||
"output":[["StartTag", "x", { "x":"" }]]},
|
||||
|
||||
{"description":"Tag/attribute name case edge values",
|
||||
"input":"<x@AZ[`az{ @AZ[`az{>",
|
||||
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
|
||||
|
||||
{"description":"Duplicate different-case attributes",
|
||||
"input":"<x x=1 x=2 X=3>",
|
||||
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
|
||||
|
||||
{"description":"Uppercase close tag attributes",
|
||||
"input":"</x X>",
|
||||
"output":["ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Duplicate close tag attributes",
|
||||
"input":"</x x x>",
|
||||
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
|
||||
|
||||
{"description":"Permitted slash",
|
||||
"input":"<br/>",
|
||||
"output":[["StartTag","br",{},true]]},
|
||||
|
||||
{"description":"Non-permitted slash",
|
||||
"input":"<xr/>",
|
||||
"output":[["StartTag","xr",{},true]]},
|
||||
|
||||
{"description":"Permitted slash but in close tag",
|
||||
"input":"</br/>",
|
||||
"output":["ParseError", ["EndTag", "br"]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
|
||||
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype public case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
|
||||
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (1)",
|
||||
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
|
||||
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
|
||||
|
||||
{"description":"Doctype system case-sensitivity (2)",
|
||||
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
|
||||
"output":[["DOCTYPE", "html", null, "xYz", true]]},
|
||||
|
||||
{"description":"U+0000 in lookahead region after non-matching character",
|
||||
"input":"<!doc>\u0000",
|
||||
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+0000 in lookahead region",
|
||||
"input":"<!doc\u0000",
|
||||
"output":["ParseError", ["Comment", "doc\uFFFD"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+0080 in lookahead region",
|
||||
"input":"<!doc\u0080",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+FDD1 in lookahead region",
|
||||
"input":"<!doc\uFDD1",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"U+1FFFF in lookahead region",
|
||||
"input":"<!doc\uD83F\uDFFF",
|
||||
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
|
||||
"ignoreErrorOrder":true},
|
||||
|
||||
{"description":"CR followed by non-LF",
|
||||
"input":"\r?",
|
||||
"output":[["Character", "\n?"]]},
|
||||
|
||||
{"description":"CR at EOF",
|
||||
"input":"\r",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"LF at EOF",
|
||||
"input":"\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR LF",
|
||||
"input":"\r\n",
|
||||
"output":[["Character", "\n"]]},
|
||||
|
||||
{"description":"CR CR",
|
||||
"input":"\r\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF LF",
|
||||
"input":"\n\n",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"LF CR",
|
||||
"input":"\n\r",
|
||||
"output":[["Character", "\n\n"]]},
|
||||
|
||||
{"description":"text CR CR CR text",
|
||||
"input":"text\r\r\rtext",
|
||||
"output":[["Character", "text\n\n\ntext"]]},
|
||||
|
||||
{"description":"Doctype publik",
|
||||
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype publi",
|
||||
"input":"<!DOCTYPE html PUBLI",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sistem",
|
||||
"input":"<!DOCTYPE html SISTEM \"AbC\">",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype sys",
|
||||
"input":"<!DOCTYPE html SYS",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
|
||||
|
||||
{"description":"Doctype html x>text",
|
||||
"input":"<!DOCTYPE html x>text",
|
||||
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
|
||||
|
||||
{"description":"Grave accent in unquoted attribute",
|
||||
"input":"<a a=aa`>",
|
||||
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
|
||||
|
||||
{"description":"EOF in tag name state ",
|
||||
"input":"<a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in tag name state",
|
||||
"input":"<a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in before attribute name state",
|
||||
"input":"<a ",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute name state",
|
||||
"input":"<a a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in after attribute name state",
|
||||
"input":"<a a ",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in before attribute value state",
|
||||
"input":"<a a =",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (double quoted) state",
|
||||
"input":"<a a =\"a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (single quoted) state",
|
||||
"input":"<a a ='a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in attribute value (unquoted) state",
|
||||
"input":"<a a =a",
|
||||
"output":["ParseError"]},
|
||||
|
||||
{"description":"EOF in after attribute value state",
|
||||
"input":"<a a ='a'",
|
||||
"output":["ParseError"]}
|
||||
|
||||
]}
|
||||
1295
html5lib/tests/testdata/tokenizer/unicodeChars.test
vendored
Normal file
1295
html5lib/tests/testdata/tokenizer/unicodeChars.test
vendored
Normal file
File diff suppressed because it is too large
Load Diff
27
html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
vendored
Normal file
27
html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
{"tests" : [
|
||||
{"description": "Invalid Unicode character U+DFFF",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uDFFF",
|
||||
"output":["ParseError", ["Character", "\\uFFFD"]]},
|
||||
|
||||
{"description": "Invalid Unicode character U+D800",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uD800",
|
||||
"output":["ParseError", ["Character", "\\uFFFD"]]},
|
||||
|
||||
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
|
||||
"doubleEscaped":true,
|
||||
"input": "a\\uDFFF",
|
||||
"output":["ParseError", ["Character", "a\\uFFFD"]]},
|
||||
|
||||
{"description": "Invalid Unicode character U+D800 with valid following character",
|
||||
"doubleEscaped":true,
|
||||
"input": "\\uD800a",
|
||||
"output":["ParseError", ["Character", "\\uFFFDa"]]},
|
||||
|
||||
{"description":"CR followed by U+0000",
|
||||
"input":"\r\u0000",
|
||||
"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
|
||||
"ignoreErrorOrder":true}
|
||||
]
|
||||
}
|
||||
22
html5lib/tests/testdata/tokenizer/xmlViolation.test
vendored
Normal file
22
html5lib/tests/testdata/tokenizer/xmlViolation.test
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
{"xmlViolationTests": [
|
||||
|
||||
{"description":"Non-XML character",
|
||||
"input":"a\uFFFFb",
|
||||
"ignoreErrorOrder":true,
|
||||
"output":["ParseError",["Character","a\uFFFDb"]]},
|
||||
|
||||
{"description":"Non-XML space",
|
||||
"input":"a\u000Cb",
|
||||
"ignoreErrorOrder":true,
|
||||
"output":[["Character","a b"]]},
|
||||
|
||||
{"description":"Double hyphen in comment",
|
||||
"input":"<!-- foo -- bar -->",
|
||||
"output":["ParseError",["Comment"," foo - - bar "]]},
|
||||
|
||||
{"description":"FF between attributes",
|
||||
"input":"<a b=''\u000Cc=''>",
|
||||
"output":[["StartTag","a",{"b":"","c":""}]]}
|
||||
]}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user