Merged in AdeHub/bugfix - fixed conflict with adding lib folder to path

This commit is contained in:
rembo10
2014-04-01 10:57:17 -07:00
149 changed files with 7652 additions and 81827 deletions

View File

@@ -207,6 +207,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE FROM allalbums WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM tracks WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM alltracks WHERE AlbumID=?", [items['AlbumID']])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [items['AlbumID']])
logger.info("[%s] Removing all references to release group %s to reflect MusicBrainz" % (artist['artist_name'], items['AlbumID']))
force_repackage = 1
else:
@@ -276,6 +277,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE from allalbums WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from tracks WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from alltracks WHERE ReleaseID=?", [rg['id']])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rg['id']])
# This will be used later to build a hybrid release
fullreleaselist = []
#Search for releases within a release group

View File

@@ -46,9 +46,9 @@ def sendNZB(nzb):
nzbGetRPC = xmlrpclib.ServerProxy(url)
try:
if nzbGetRPC.writelog("INFO", "headphones connected to drop of %s any moment now." % (nzb.name + ".nzb")):
logger.debug(u"Successful connected to NZBget")
logger.debug(u"Successfully connected to NZBget")
else:
logger.error(u"Successful connected to NZBget, but unable to send a message" % (nzb.name + ".nzb"))
logger.info(u"Successfully connected to NZBget, but unable to send a message" % (nzb.name + ".nzb"))
except httplib.socket.error:
logger.error(u"Please check your NZBget host and port (if it is running). NZBget is not responding to this combination")
@@ -74,7 +74,7 @@ def sendNZB(nzb):
nzbcontent64 = standard_b64encode(data)
logger.error(u"Sending NZB to NZBget")
logger.info(u"Sending NZB to NZBget")
logger.debug(u"URL: " + url)
if nzbGetRPC.append(nzb.name + ".nzb", headphones.NZBGET_CATEGORY, addToTop, nzbcontent64):

View File

@@ -258,7 +258,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
if headphones.NEWZNAB:
newznab_hosts = [(headphones.NEWZNAB_HOST, headphones.NEWZNAB_APIKEY, headphones.NEWZNAB_ENABLED)]
newznab_hosts = []
for newznab_host in headphones.EXTRA_NEWZNABS:
if newznab_host[2] == '1' or newznab_host[2] == 1:
@@ -555,7 +555,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
for result in resultlist:
if high_size_limit and (result[1] > high_size_limit):
if high_size_limit and (int(result[1]) > high_size_limit):
logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")")
@@ -565,7 +565,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
continue
if low_size_limit and (result[1] < low_size_limit):
if low_size_limit and (int(result[1]) < low_size_limit):
logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")")
continue
@@ -1415,7 +1415,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
for result in resultlist:
if high_size_limit and (result[1] > high_size_limit):
if high_size_limit and (int(result[1]) > high_size_limit):
logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")")
# Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches
@@ -1424,7 +1424,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
continue
if low_size_limit and (result[1] < low_size_limit):
if low_size_limit and (int(result[1]) < low_size_limit):
logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")")
continue

View File

@@ -175,6 +175,7 @@ class WebInterface(object):
myDB.action('DELETE from albums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from allalbums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from alltracks WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', album['AlbumID'])
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
removeExtras.exposed = True
@@ -203,8 +204,18 @@ class WebInterface(object):
for name in namecheck:
artistname=name['ArtistName']
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('UPDATE have SET Matched=NULL WHERE ArtistName=?', [artistname])
@@ -219,8 +230,18 @@ class WebInterface(object):
for ArtistID in emptyArtistIDs:
logger.info(u"Deleting all traces of artist: " + ArtistID)
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])
@@ -299,6 +320,7 @@ class WebInterface(object):
myDB.action('DELETE from tracks WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [AlbumID])
if ArtistID:
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
else:
@@ -552,8 +574,18 @@ class WebInterface(object):
for ArtistID in args:
if action == 'delete':
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
for rgid in rgids:
myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])

View File

@@ -1,17 +0,0 @@
"""
HTML parsing library based on the WHATWG "HTML5"
specification. The parser is designed to be compatible with existing
HTML found in the wild and implements well-defined error recovery that
is largely compatible with modern desktop web browsers.
Example usage:
import html5lib
f = open("my_document.html")
tree = html5lib.parse(f)
"""
__version__ = "0.95-dev"
from html5parser import HTMLParser, parse, parseFragment
from treebuilders import getTreeBuilder
from treewalkers import getTreeWalker
from serializer import serialize

File diff suppressed because it is too large Load Diff

View File

@@ -1,127 +0,0 @@
#
# The goal is to finally have a form filler where you pass data for
# each form, using the algorithm for "Seeding a form with initial values"
# See http://www.whatwg.org/specs/web-forms/current-work/#seeding
#
import _base
from html5lib.constants import spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
class SimpleFilter(_base.Filter):
def __init__(self, source, fieldStorage):
_base.Filter.__init__(self, source)
self.fieldStorage = fieldStorage
def __iter__(self):
field_indices = {}
state = None
field_name = None
for token in _base.Filter.__iter__(self):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
name = token["name"].lower()
if name == "input":
field_name = None
field_type = None
input_value_index = -1
input_checked_index = -1
for i,(n,v) in enumerate(token["data"]):
n = n.lower()
if n == u"name":
field_name = v.strip(spaceCharacters)
elif n == u"type":
field_type = v.strip(spaceCharacters)
elif n == u"checked":
input_checked_index = i
elif n == u"value":
input_value_index = i
value_list = self.fieldStorage.getlist(field_name)
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
if field_type in (u"checkbox", u"radio"):
if value_list:
if token["data"][input_value_index][1] == value:
if input_checked_index < 0:
token["data"].append((u"checked", u""))
field_indices[field_name] = field_index + 1
elif input_checked_index >= 0:
del token["data"][input_checked_index]
elif field_type not in (u"button", u"submit", u"reset"):
if input_value_index >= 0:
token["data"][input_value_index] = (u"value", value)
else:
token["data"].append((u"value", value))
field_indices[field_name] = field_index + 1
field_type = None
field_name = None
elif name == "textarea":
field_type = "textarea"
field_name = dict((token["data"])[::-1])["name"]
elif name == "select":
field_type = "select"
attributes = dict(token["data"][::-1])
field_name = attributes.get("name")
is_select_multiple = "multiple" in attributes
is_selected_option_found = False
elif field_type == "select" and field_name and name == "option":
option_selected_index = -1
option_value = None
for i,(n,v) in enumerate(token["data"]):
n = n.lower()
if n == "selected":
option_selected_index = i
elif n == "value":
option_value = v.strip(spaceCharacters)
if option_value is None:
raise NotImplementedError("<option>s without a value= attribute")
else:
value_list = self.fieldStorage.getlist(field_name)
if value_list:
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
if (is_select_multiple or not is_selected_option_found) and option_value == value:
if option_selected_index < 0:
token["data"].append((u"selected", u""))
field_indices[field_name] = field_index + 1
is_selected_option_found = True
elif option_selected_index >= 0:
del token["data"][option_selected_index]
elif field_type is not None and field_name and type == "EndTag":
name = token["name"].lower()
if name == field_type:
if name == "textarea":
value_list = self.fieldStorage.getlist(field_name)
if value_list:
field_index = field_indices.setdefault(field_name, 0)
if field_index < len(value_list):
value = value_list[field_index]
else:
value = ""
yield {"type": "Characters", "data": value}
field_indices[field_name] = field_index + 1
field_name = None
elif name == "option" and field_type == "select":
pass # TODO: part of "option without value= attribute" processing
elif field_type == "textarea":
continue # ignore token
yield token

View File

@@ -1,62 +0,0 @@
import _base
class Filter(_base.Filter):
def __init__(self, source, encoding):
_base.Filter.__init__(self, source)
self.encoding = encoding
def __iter__(self):
state = "pre_head"
meta_found = (self.encoding is None)
pending = []
for token in _base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag":
if token["name"].lower() == u"head":
state = "in_head"
elif type == "EmptyTag":
if token["name"].lower() == u"meta":
# replace charset with actual encoding
has_http_equiv_content_type = False
for (namespace,name),value in token["data"].iteritems():
if namespace != None:
continue
elif name.lower() == u'charset':
token["data"][(namespace,name)] = self.encoding
meta_found = True
break
elif name == u'http-equiv' and value.lower() == u'content-type':
has_http_equiv_content_type = True
else:
if has_http_equiv_content_type and (None, u"content") in token["data"]:
token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
meta_found = True
elif token["name"].lower() == u"head" and not meta_found:
# insert meta into empty head
yield {"type": "StartTag", "name": u"head",
"data": token["data"]}
yield {"type": "EmptyTag", "name": u"meta",
"data": {(None, u"charset"): self.encoding}}
yield {"type": "EndTag", "name": u"head"}
meta_found = True
continue
elif type == "EndTag":
if token["name"].lower() == u"head" and pending:
# insert meta into head (if necessary) and flush pending queue
yield pending.pop(0)
if not meta_found:
yield {"type": "EmptyTag", "name": u"meta",
"data": {(None, u"charset"): self.encoding}}
while pending:
yield pending.pop(0)
meta_found = True
state = "post_head"
if state == "in_head":
pending.append(token)
else:
yield token

View File

@@ -1,8 +0,0 @@
import _base
from html5lib.sanitizer import HTMLSanitizerMixin
class Filter(_base.Filter, HTMLSanitizerMixin):
def __iter__(self):
for token in _base.Filter.__iter__(self):
token = self.sanitize_token(token)
if token: yield token

View File

@@ -1,177 +0,0 @@
import re
baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
letter = " | ".join([baseChar, ideographic])
#Without the
name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
extender])
nameFirst = " | ".join([letter, "_"])
reChar = re.compile(r"#x([\d|A-F]{4,4})")
reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
def charStringToList(chars):
charRanges = [item.strip() for item in chars.split(" | ")]
rv = []
for item in charRanges:
foundMatch = False
for regexp in (reChar, reCharRange):
match = regexp.match(item)
if match is not None:
rv.append([hexToInt(item) for item in match.groups()])
if len(rv[-1]) == 1:
rv[-1] = rv[-1]*2
foundMatch = True
break
if not foundMatch:
assert len(item) == 1
rv.append([ord(item)] * 2)
rv = normaliseCharList(rv)
return rv
def normaliseCharList(charList):
charList = sorted(charList)
for item in charList:
assert item[1] >= item[0]
rv = []
i = 0
while i < len(charList):
j = 1
rv.append(charList[i])
while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
rv[-1][1] = charList[i+j][1]
j += 1
i += j
return rv
#We don't really support characters above the BMP :(
max_unicode = int("FFFF", 16)
def missingRanges(charList):
rv = []
if charList[0] != 0:
rv.append([0, charList[0][0] - 1])
for i, item in enumerate(charList[:-1]):
rv.append([item[1]+1, charList[i+1][0] - 1])
if charList[-1][1] != max_unicode:
rv.append([charList[-1][1] + 1, max_unicode])
return rv
def listToRegexpStr(charList):
rv = []
for item in charList:
if item[0] == item[1]:
rv.append(escapeRegexp(unichr(item[0])))
else:
rv.append(escapeRegexp(unichr(item[0])) + "-" +
escapeRegexp(unichr(item[1])))
return "[%s]"%"".join(rv)
def hexToInt(hex_str):
return int(hex_str, 16)
def escapeRegexp(string):
specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
"[", "]", "|", "(", ")", "-")
for char in specialCharacters:
string = string.replace(char, "\\" + char)
if char in string:
print string
return string
#output from the above
nonXmlNameBMPRegexp = re.compile(u'[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
nonXmlNameFirstBMPRegexp = re.compile(u'[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
class InfosetFilter(object):
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
def __init__(self, replaceChars = None,
dropXmlnsLocalName = False,
dropXmlnsAttrNs = False,
preventDoubleDashComments = False,
preventDashAtCommentEnd = False,
replaceFormFeedCharacters = True):
self.dropXmlnsLocalName = dropXmlnsLocalName
self.dropXmlnsAttrNs = dropXmlnsAttrNs
self.preventDoubleDashComments = preventDoubleDashComments
self.preventDashAtCommentEnd = preventDashAtCommentEnd
self.replaceFormFeedCharacters = replaceFormFeedCharacters
self.replaceCache = {}
def coerceAttribute(self, name, namespace=None):
if self.dropXmlnsLocalName and name.startswith("xmlns:"):
#Need a datalosswarning here
return None
elif (self.dropXmlnsAttrNs and
namespace == "http://www.w3.org/2000/xmlns/"):
return None
else:
return self.toXmlName(name)
def coerceElement(self, name, namespace=None):
return self.toXmlName(name)
def coerceComment(self, data):
if self.preventDoubleDashComments:
while "--" in data:
data = data.replace("--", "- -")
return data
def coerceCharacters(self, data):
if self.replaceFormFeedCharacters:
data = data.replace("\x0C", " ")
#Other non-xml characters
return data
def toXmlName(self, name):
nameFirst = name[0]
nameRest = name[1:]
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
if m:
nameFirstOutput = self.getReplacementCharacter(nameFirst)
else:
nameFirstOutput = nameFirst
nameRestOutput = nameRest
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
for char in replaceChars:
replacement = self.getReplacementCharacter(char)
nameRestOutput = nameRestOutput.replace(char, replacement)
return nameFirstOutput + nameRestOutput
def getReplacementCharacter(self, char):
if char in self.replaceCache:
replacement = self.replaceCache[char]
else:
replacement = self.escapeChar(char)
return replacement
def fromXmlName(self, name):
for item in set(self.replacementRegexp.findall(name)):
name = name.replace(item, self.unescapeChar(item))
return name
def escapeChar(self, char):
replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0")
self.replaceCache[char] = replacement
return replacement
def unescapeChar(self, charcode):
return unichr(int(charcode[1:], 16))

View File

@@ -1,258 +0,0 @@
import re
from xml.sax.saxutils import escape, unescape
from tokenizer import HTMLTokenizer
from constants import tokenTypes
class HTMLSanitizerMixin(object):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
'munderover', 'none']
svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
'background', 'balance', 'bgcolor', 'bgproperties', 'border',
'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
'width', 'wrap', 'xml:lang']
mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
'xlink:type', 'xmlns', 'xmlns:xlink']
svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
'arabic-form', 'ascent', 'attributeName', 'attributeType',
'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
'fill-opacity', 'fill-rule', 'font-family', 'font-size',
'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
'opacity', 'orient', 'origin', 'overline-position',
'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
'transform', 'type', 'u1', 'u2', 'underline-position',
'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
'y1', 'y2', 'zoomAndPan']
attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
'xlink:href', 'xml:base']
svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
'mask', 'stroke']
svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
'set', 'use']
acceptable_css_properties = ['azimuth', 'background-color',
'border-bottom-color', 'border-collapse', 'border-color',
'border-left-color', 'border-right-color', 'border-top-color', 'clear',
'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
'white-space', 'width']
acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
'transparent', 'underline', 'white', 'yellow']
acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
'stroke-opacity']
acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
'ssh', 'sftp', 'rtsp', 'afs' ]
# subclasses may define their own versions of these constants
allowed_elements = acceptable_elements + mathml_elements + svg_elements
allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
allowed_css_properties = acceptable_css_properties
allowed_css_keywords = acceptable_css_keywords
allowed_svg_properties = acceptable_svg_properties
allowed_protocols = acceptable_protocols
# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
# attributes are parsed, and a restricted set, # specified by
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
# in ALLOWED_PROTOCOLS are allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script>
# sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
# => <a>Click here for $100</a>
def sanitize_token(self, token):
# accommodate filters which use token_type differently
token_type = token["type"]
if token_type in tokenTypes.keys():
token_type = tokenTypes[token_type]
if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]):
if token["name"] in self.allowed_elements:
if token.has_key("data"):
attrs = dict([(name,val) for name,val in
token["data"][::-1]
if name in self.allowed_attributes])
for attr in self.attr_val_is_uri:
if not attrs.has_key(attr):
continue
val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
unescape(attrs[attr])).lower()
#remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace(u"\ufffd", "")
if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
(val_unescaped.split(':')[0] not in
self.allowed_protocols)):
del attrs[attr]
for attr in self.svg_attr_val_allows_ref:
if attr in attrs:
attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
' ',
unescape(attrs[attr]))
if (token["name"] in self.svg_allow_local_href and
'xlink:href' in attrs and re.search('^\s*[^#\s].*',
attrs['xlink:href'])):
del attrs['xlink:href']
if attrs.has_key('style'):
attrs['style'] = self.sanitize_css(attrs['style'])
token["data"] = [[name,val] for name,val in attrs.items()]
return token
else:
if token_type == tokenTypes["EndTag"]:
token["data"] = "</%s>" % token["name"]
elif token["data"]:
attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
token["data"] = "<%s%s>" % (token["name"],attrs)
else:
token["data"] = "<%s>" % token["name"]
if token.get("selfClosing"):
token["data"]=token["data"][:-1] + "/>"
if token["type"] in tokenTypes.keys():
token["type"] = "Characters"
else:
token["type"] = tokenTypes["Characters"]
del token["name"]
return token
elif token_type == tokenTypes["Comment"]:
pass
else:
return token
def sanitize_css(self, style):
# disallow urls
style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
# gauntlet
if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
clean = []
for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
if not value: continue
if prop.lower() in self.allowed_css_properties:
clean.append(prop + ': ' + value + ';')
elif prop.split('-')[0].lower() in ['background','border','margin',
'padding']:
for keyword in value.split():
if not keyword in self.acceptable_css_keywords and \
not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
break
else:
clean.append(prop + ': ' + value + ';')
elif prop.lower() in self.allowed_svg_properties:
clean.append(prop + ': ' + value + ';')
return ' '.join(clean)
class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
lowercaseElementName=False, lowercaseAttrName=False, parser=None):
#Change case matching defaults as we only output lowercase html anyway
#This solution doesn't seem ideal...
HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
lowercaseElementName, lowercaseAttrName, parser=parser)
def __iter__(self):
for token in HTMLTokenizer.__iter__(self):
token = self.sanitize_token(token)
if token:
yield token

View File

@@ -1,17 +0,0 @@
from html5lib import treewalkers
from htmlserializer import HTMLSerializer
from xhtmlserializer import XHTMLSerializer
def serialize(input, tree="simpletree", format="html", encoding=None,
**serializer_opts):
# XXX: Should we cache this?
walker = treewalkers.getTreeWalker(tree)
if format == "html":
s = HTMLSerializer(**serializer_opts)
elif format == "xhtml":
s = XHTMLSerializer(**serializer_opts)
else:
raise ValueError, "type must be either html or xhtml"
return s.render(walker(input), encoding)

View File

@@ -1,9 +0,0 @@
from htmlserializer import HTMLSerializer
class XHTMLSerializer(HTMLSerializer):
quote_attr_values = True
minimize_boolean_attributes = False
use_trailing_solidus = True
escape_lt_in_attrs = True
omit_optional_tags = False
escape_rcdata = True

View File

@@ -1,12 +0,0 @@
import sys
import os
parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
if not parent_path in sys.path:
sys.path.insert(0, parent_path)
del parent_path
from runtests import buildTestSuite
import support

View File

@@ -1,37 +0,0 @@
import sys
import os
if __name__ == '__main__':
#Allow us to import from the src directory
os.chdir(os.path.split(os.path.abspath(__file__))[0])
sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
from tokenizer import HTMLTokenizer
class HTMLParser(object):
""" Fake parser to test tokenizer output """
def parse(self, stream, output=True):
tokenizer = HTMLTokenizer(stream)
for token in tokenizer:
if output:
print token
if __name__ == "__main__":
x = HTMLParser()
if len(sys.argv) > 1:
if len(sys.argv) > 2:
import hotshot, hotshot.stats
prof = hotshot.Profile('stats.prof')
prof.runcall(x.parse, sys.argv[1], False)
prof.close()
stats = hotshot.stats.load('stats.prof')
stats.strip_dirs()
stats.sort_stats('time')
stats.print_stats()
else:
x.parse(sys.argv[1])
else:
print """Usage: python mockParser.py filename [stats]
If stats is specified the hotshots profiler will run and output the
stats instead.
"""

View File

@@ -1,27 +0,0 @@
import sys
import os
import glob
import unittest
#Allow us to import the parent module
os.chdir(os.path.split(os.path.abspath(__file__))[0])
sys.path.insert(0, os.path.abspath(os.curdir))
sys.path.insert(0, os.path.abspath(os.pardir))
sys.path.insert(0, os.path.join(os.path.abspath(os.pardir), "src"))
def buildTestSuite():
suite = unittest.TestSuite()
for testcase in glob.glob('test_*.py'):
if testcase in ("test_tokenizer.py", "test_parser.py", "test_parser2.py"):
module = os.path.splitext(testcase)[0]
suite.addTest(__import__(module).buildTestSuite())
return suite
def main():
results = unittest.TextTestRunner().run(buildTestSuite())
return results
if __name__ == "__main__":
results = main()
if not results.wasSuccessful():
sys.exit(1)

View File

@@ -1,20 +0,0 @@
import sys
import os
import glob
import unittest
def buildTestSuite():
suite = unittest.TestSuite()
for testcase in glob.glob('test_*.py'):
module = os.path.splitext(testcase)[0]
suite.addTest(__import__(module).buildTestSuite())
return suite
def main():
results = unittest.TextTestRunner().run(buildTestSuite())
return results
if __name__ == "__main__":
results = main()
if not results.wasSuccessful():
sys.exit(1)

View File

@@ -1,127 +0,0 @@
import os
import sys
import codecs
import glob
base_path = os.path.split(__file__)[0]
if os.path.exists(os.path.join(base_path, 'testdata')):
#release
test_dir = os.path.join(base_path, 'testdata')
else:
#development
test_dir = os.path.abspath(
os.path.join(base_path,
os.path.pardir, os.path.pardir,
os.path.pardir, 'testdata'))
assert os.path.exists(test_dir), "Test data not found"
#import the development html5lib
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
import html5lib
from html5lib import html5parser, treebuilders
del base_path
#Build a dict of avaliable trees
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
def html5lib_test_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data"):
self.f = codecs.open(filename, encoding="utf8")
self.newTestHeading = newTestHeading
def __iter__(self):
data = DefaultDict(None)
key=None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
#Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key]=""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
if line.startswith("#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
#Remove trailing newlines
for key,value in data.iteritems():
if value.endswith("\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)

View File

@@ -1,54 +0,0 @@
import os
import unittest
from support import html5lib_test_files, TestData, test_dir
from html5lib import HTMLParser, inputstream
import re, unittest
class Html5EncodingTestCase(unittest.TestCase):
def test_codec_name(self):
self.assertEquals(inputstream.codecName("utf-8"), "utf-8")
self.assertEquals(inputstream.codecName("utf8"), "utf-8")
self.assertEquals(inputstream.codecName(" utf8 "), "utf-8")
self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252")
def buildTestSuite():
for filename in html5lib_test_files("encoding"):
test_name = os.path.basename(filename).replace('.dat',''). \
replace('-','')
tests = TestData(filename, "data")
for idx, test in enumerate(tests):
def encodingTest(self, data=test['data'],
encoding=test['encoding']):
p = HTMLParser()
t = p.parse(data, useChardet=False)
errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
(data, repr(encoding.lower()),
repr(p.tokenizer.stream.charEncoding)))
self.assertEquals(encoding.lower(),
p.tokenizer.stream.charEncoding[0],
errorMessage)
setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
encodingTest)
try:
import chardet
def test_chardet(self):
data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
encoding = inputstream.HTMLInputStream(data).charEncoding
assert encoding[0].lower() == "big5"
setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
except ImportError:
print "chardet not found, skipping chardet tests"
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,296 +0,0 @@
import sys
import unittest
from html5lib.filters.formfiller import SimpleFilter
class FieldStorage(dict):
def getlist(self, name):
l = self[name]
if isinstance(l, list):
return l
elif isinstance(l, tuple) or hasattr(l, '__iter__'):
return list(l)
return [l]
class TestCase(unittest.TestCase):
def runTest(self, input, formdata, expected):
try:
output = list(SimpleFilter(input, formdata))
except NotImplementedError, nie:
# Amnesty for those that confess...
print >>sys.stderr, "Not implemented:", str(nie)
else:
errorMsg = "\n".join(["\n\nInput:", str(input),
"\nForm data:", str(formdata),
"\nExpected:", str(expected),
"\nReceived:", str(output)])
self.assertEquals(output, expected, errorMsg)
def testSingleTextInputWithValue(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"quux")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
def testSingleTextInputWithoutValue(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
def testSingleCheckbox(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
def testSingleCheckboxShouldBeUnchecked(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
def testSingleCheckboxCheckedByDefault(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
def testSingleCheckboxCheckedByDefaultShouldBeUnchecked(self):
self.runTest(
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux"), (u"checked", u"")]}],
FieldStorage({"foo": "bar"}),
[{"type": u"EmptyTag", "name": u"input",
"data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
def testSingleTextareaWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"textarea", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"textarea", "data": []}])
def testSingleTextareaWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"EndTag", "name": u"textarea", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"textarea", "data": []}])
def testSingleSelectWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectWithoutValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithoutValue(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "bar"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectTwoOptionsWithoutValueShouldBeUnselected(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"baz"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": "quux"}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"bar"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": []},
{"type": u"Characters", "data": u"baz"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testSingleSelectMultiple(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": ["bar", "quux"]}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def testTwoSelect(self):
self.runTest(
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []},
{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}],
FieldStorage({"foo": ["bar", "quux"]}),
[{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []},
{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
{"type": u"Characters", "data": u"quux"},
{"type": u"EndTag", "name": u"option", "data": []},
{"type": u"EndTag", "name": u"select", "data": []}])
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,140 +0,0 @@
import os
import sys
import traceback
import StringIO
import warnings
import re
warnings.simplefilter("error")
from support import html5lib_test_files as data_files
from support import TestData, convert, convertExpected
import html5lib
from html5lib import html5parser, treebuilders, constants
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
try:
import lxml.html as lxml
except ImportError:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
#Try whatever dom implementations are avaliable from a list that are
#"supposed" to work
try:
import pxdom
treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
except ImportError:
pass
#Run the parse error checks
checkParseErrors = False
#XXX - There should just be one function here but for some reason the testcase
#format differs from the treedump format by a single space character
def convertTreeDump(data):
return "\n".join(convert(3)(data).split("\n")[1:])
namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
def runParserTest(innerHTML, input, expected, errors, treeClass,
namespaceHTMLElements):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
try:
p = html5parser.HTMLParser(tree = treeClass,
namespaceHTMLElements=namespaceHTMLElements)
except constants.DataLossWarning:
return
try:
if innerHTML:
document = p.parseFragment(input, innerHTML)
else:
try:
document = p.parse(input)
except constants.DataLossWarning:
return
except:
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
u"\nTraceback:", traceback.format_exc()])
assert False, errorMsg.encode("utf8")
output = convertTreeDump(p.tree.testSerializer(document))
expected = convertExpected(expected)
if namespaceHTMLElements:
expected = namespaceExpected(r"\1<html \2>", expected)
errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
u"\nReceived:", output])
assert expected == output, errorMsg.encode("utf8")
errStr = [u"Line: %i Col: %i %s"%(line, col,
constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
((line,col), errorcode, datavars) in p.errors]
errorMsg2 = u"\n".join([u"\n\nInput:", input,
u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
if checkParseErrors:
assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
def test_parser():
sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
files = data_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
tests = TestData(filename, "data")
for index, test in enumerate(tests):
input, errors, innerHTML, expected = [test[key] for key in
'data', 'errors',
'document-fragment',
'document']
if errors:
errors = errors.split("\n")
for treeName, treeCls in treeTypes.iteritems():
for namespaceHTMLElements in (True, False):
print input
yield (runParserTest, innerHTML, input, expected, errors, treeCls,
namespaceHTMLElements)
break

View File

@@ -1,39 +0,0 @@
import support
from html5lib import html5parser
from html5lib.constants import namespaces
from html5lib.treebuilders import dom
import unittest
# tests that aren't autogenerated from text files
class MoreParserTests(unittest.TestCase):
def test_assertDoctypeCloneable(self):
parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
doc = parser.parse('<!DOCTYPE HTML>')
self.assert_(doc.cloneNode(True))
def test_line_counter(self):
# http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
parser.parse("<pre>\nx\n&gt;\n</pre>")
def test_namespace_html_elements_0(self):
parser = html5parser.HTMLParser(namespaceHTMLElements=True)
doc = parser.parse("<html></html>")
self.assert_(doc.childNodes[0].namespace == namespaces["html"])
def test_namespace_html_elements_1(self):
parser = html5parser.HTMLParser(namespaceHTMLElements=False)
doc = parser.parse("<html></html>")
self.assert_(doc.childNodes[0].namespace == None)
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()

View File

@@ -1,76 +0,0 @@
import os
import sys
import unittest
try:
import json
except ImportError:
import simplejson as json
from html5lib import html5parser, sanitizer, constants
def runSanitizerTest(name, expected, input):
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
parseFragment(expected).childNodes])
expected = json.loads(json.dumps(expected))
assert expected == sanitize_html(input)
def sanitize_html(stream):
return ''.join([token.toxml() for token in
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
parseFragment(stream).childNodes])
def test_should_handle_astral_plane_characters():
assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
def test_sanitizer():
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
continue ### TODO
if tag_name != tag_name.lower():
continue ### TODO
if tag_name == 'image':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
elif tag_name == 'br':
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
elif tag_name in constants.voidElements:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
else:
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
tag_name = tag_name.upper()
yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
"&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
if attribute_name != attribute_name.lower(): continue ### TODO
if attribute_name == 'style': continue
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
"<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
attribute_name = attribute_name.upper()
yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
"<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
"<a href=\"%s\">foo</a>" % protocol,
"""<a href="%s">foo</a>""" % protocol)
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
"<a href=\"%s\">foo</a>" % protocol,
"""<a href="%s">foo</a>""" % protocol)

View File

@@ -1,180 +0,0 @@
import os
import unittest
from support import html5lib_test_files
try:
import json
except ImportError:
import simplejson as json
import html5lib
from html5lib import html5parser, serializer, constants
from html5lib.treewalkers._base import TreeWalker
optionals_loaded = []
try:
from lxml import etree
optionals_loaded.append("lxml")
except ImportError:
pass
default_namespace = constants.namespaces["html"]
class JsonWalker(TreeWalker):
def __iter__(self):
for token in self.tree:
type = token[0]
if type == "StartTag":
if len(token) == 4:
namespace, name, attrib = token[1:4]
else:
namespace = default_namespace
name, attrib = token[1:3]
yield self.startTag(namespace, name, self._convertAttrib(attrib))
elif type == "EndTag":
if len(token) == 3:
namespace, name = token[1:3]
else:
namespace = default_namespace
name = token[1]
yield self.endTag(namespace, name)
elif type == "EmptyTag":
if len(token) == 4:
namespace, name, attrib = token[1:]
else:
namespace = default_namespace
name, attrib = token[1:]
for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
yield token
elif type == "Comment":
yield self.comment(token[1])
elif type in ("Characters", "SpaceCharacters"):
for token in self.text(token[1]):
yield token
elif type == "Doctype":
if len(token) == 4:
yield self.doctype(token[1], token[2], token[3])
elif len(token) == 3:
yield self.doctype(token[1], token[2])
else:
yield self.doctype(token[1])
else:
raise ValueError("Unknown token type: " + type)
def _convertAttrib(self, attribs):
"""html5lib tree-walkers use a dict of (namespace, name): value for
attributes, but JSON cannot represent this. Convert from the format
in the serializer tests (a list of dicts with "namespace", "name",
and "value" as keys) to html5lib's tree-walker format."""
attrs = {}
for attrib in attribs:
name = (attrib["namespace"], attrib["name"])
assert(name not in attrs)
attrs[name] = attrib["value"]
return attrs
def serialize_html(input, options):
options = dict([(str(k),v) for k,v in options.iteritems()])
return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
def serialize_xhtml(input, options):
options = dict([(str(k),v) for k,v in options.iteritems()])
return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
def make_test(input, expected, xhtml, options):
result = serialize_html(input, options)
if len(expected) == 1:
assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
elif result not in expected:
assert False, "Expected: %s, Received: %s" % (expected, result)
if not xhtml:
return
result = serialize_xhtml(input, options)
if len(xhtml) == 1:
assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
elif result not in xhtml:
assert False, "Expected: %s, Received: %s" % (xhtml, result)
class EncodingTestCase(unittest.TestCase):
def throwsWithLatin1(self, input):
self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
def testDoctypeName(self):
self.throwsWithLatin1([["Doctype", u"\u0101"]])
def testDoctypePublicId(self):
self.throwsWithLatin1([["Doctype", u"potato", u"\u0101"]])
def testDoctypeSystemId(self):
self.throwsWithLatin1([["Doctype", u"potato", u"potato", u"\u0101"]])
def testCdataCharacters(self):
self.assertEquals("<style>&amacr;", serialize_html([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}],
["Characters", u"\u0101"]],
{"encoding": "iso-8859-1"}))
def testCharacters(self):
self.assertEquals("&amacr;", serialize_html([["Characters", u"\u0101"]],
{"encoding": "iso-8859-1"}))
def testStartTagName(self):
self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
def testEmptyTagName(self):
self.throwsWithLatin1([["EmptyTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
def testAttributeName(self):
self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"span", [{"namespace": None, "name": u"\u0101", "value": u"potato"}]]])
def testAttributeValue(self):
self.assertEquals("<span potato=&amacr;>", serialize_html([["StartTag", u"http://www.w3.org/1999/xhtml", u"span",
[{"namespace": None, "name": u"potato", "value": u"\u0101"}]]],
{"encoding": "iso-8859-1"}))
def testEndTagName(self):
self.throwsWithLatin1([["EndTag", u"http://www.w3.org/1999/xhtml", u"\u0101"]])
def testComment(self):
self.throwsWithLatin1([["Comment", u"\u0101"]])
if "lxml" in optionals_loaded:
class LxmlTestCase(unittest.TestCase):
def setUp(self):
self.parser = etree.XMLParser(resolve_entities=False)
self.treewalker = html5lib.getTreeWalker("lxml")
self.serializer = serializer.HTMLSerializer()
def testEntityReplacement(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
def testEntityXML(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
def testEntityNoResolve(self):
doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
tree = etree.fromstring(doc, parser = self.parser).getroottree()
result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
resolve_entities=False)
self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
def test_serializer():
for filename in html5lib_test_files('serializer', '*.test'):
tests = json.load(file(filename))
test_name = os.path.basename(filename).replace('.test','')
for index, test in enumerate(tests['tests']):
xhtml = test.get("xhtml", test["expected"])
if test_name == 'optionaltags':
xhtml = None
yield make_test, test["input"], test["expected"], xhtml, test.get("options", {})

View File

@@ -1,97 +0,0 @@
import support
import unittest, codecs
from html5lib.inputstream import HTMLInputStream
class HTMLInputStreamShortChunk(HTMLInputStream):
_defaultChunkSize = 2
class HTMLInputStreamTest(unittest.TestCase):
def test_char_ascii(self):
stream = HTMLInputStream("'", encoding='ascii')
self.assertEquals(stream.charEncoding[0], 'ascii')
self.assertEquals(stream.char(), "'")
def test_char_null(self):
stream = HTMLInputStream("\x00")
self.assertEquals(stream.char(), u'\ufffd')
def test_char_utf8(self):
stream = HTMLInputStream(u'\u2018'.encode('utf-8'), encoding='utf-8')
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), u'\u2018')
def test_char_win1252(self):
stream = HTMLInputStream(u"\xa9\xf1\u2019".encode('windows-1252'))
self.assertEquals(stream.charEncoding[0], 'windows-1252')
self.assertEquals(stream.char(), u"\xa9")
self.assertEquals(stream.char(), u"\xf1")
self.assertEquals(stream.char(), u"\u2019")
def test_bom(self):
stream = HTMLInputStream(codecs.BOM_UTF8 + "'")
self.assertEquals(stream.charEncoding[0], 'utf-8')
self.assertEquals(stream.char(), "'")
def test_utf_16(self):
stream = HTMLInputStream((' '*1025).encode('utf-16'))
self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
def test_newlines(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
self.assertEquals(stream.position(), (4, 4))
self.assertEquals(stream.charsUntil('e'), u"x")
self.assertEquals(stream.position(), (4, 5))
def test_newlines2(self):
size = HTMLInputStream._defaultChunkSize
stream = HTMLInputStream("\r" * size + "\n")
self.assertEquals(stream.charsUntil('x'), "\n" * size)
def test_position(self):
stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.charsUntil('c'), u"\n")
self.assertEquals(stream.position(), (3, 0))
stream.unget(u"\n")
self.assertEquals(stream.position(), (2, 2))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (3, 0))
self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
self.assertEquals(stream.position(), (4, 3))
self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
self.assertEquals(stream.position(), (6, 1))
def test_position2(self):
stream = HTMLInputStreamShortChunk("abc\nd")
self.assertEquals(stream.position(), (1, 0))
self.assertEquals(stream.char(), u"a")
self.assertEquals(stream.position(), (1, 1))
self.assertEquals(stream.char(), u"b")
self.assertEquals(stream.position(), (1, 2))
self.assertEquals(stream.char(), u"c")
self.assertEquals(stream.position(), (1, 3))
self.assertEquals(stream.char(), u"\n")
self.assertEquals(stream.position(), (2, 0))
self.assertEquals(stream.char(), u"d")
self.assertEquals(stream.position(), (2, 1))
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == '__main__':
main()

View File

@@ -1,193 +0,0 @@
import sys
import os
import unittest
import cStringIO
import warnings
import re
try:
import json
except ImportError:
import simplejson as json
from support import html5lib_test_files
from html5lib.tokenizer import HTMLTokenizer
from html5lib import constants
class TokenizerTestParser(object):
def __init__(self, initialState, lastStartTag=None):
self.tokenizer = HTMLTokenizer
self._state = initialState
self._lastStartTag = lastStartTag
def parse(self, stream, encoding=None, innerHTML=False):
tokenizer = self.tokenizer(stream, encoding)
self.outputTokens = []
tokenizer.state = getattr(tokenizer, self._state)
if self._lastStartTag is not None:
tokenizer.currentToken = {"type": "startTag",
"name":self._lastStartTag}
types = dict((v,k) for k,v in constants.tokenTypes.iteritems())
for token in tokenizer:
getattr(self, 'process%s' % types[token["type"]])(token)
return self.outputTokens
def processDoctype(self, token):
self.outputTokens.append([u"DOCTYPE", token["name"], token["publicId"],
token["systemId"], token["correct"]])
def processStartTag(self, token):
self.outputTokens.append([u"StartTag", token["name"],
dict(token["data"][::-1]), token["selfClosing"]])
def processEmptyTag(self, token):
if token["name"] not in constants.voidElements:
self.outputTokens.append(u"ParseError")
self.outputTokens.append([u"StartTag", token["name"], dict(token["data"][::-1])])
def processEndTag(self, token):
self.outputTokens.append([u"EndTag", token["name"],
token["selfClosing"]])
def processComment(self, token):
self.outputTokens.append([u"Comment", token["data"]])
def processSpaceCharacters(self, token):
self.outputTokens.append([u"Character", token["data"]])
self.processSpaceCharacters = self.processCharacters
def processCharacters(self, token):
self.outputTokens.append([u"Character", token["data"]])
def processEOF(self, token):
pass
def processParseError(self, token):
self.outputTokens.append([u"ParseError", token["data"]])
def concatenateCharacterTokens(tokens):
outputTokens = []
for token in tokens:
if not "ParseError" in token and token[0] == "Character":
if (outputTokens and not "ParseError" in outputTokens[-1] and
outputTokens[-1][0] == "Character"):
outputTokens[-1][1] += token[1]
else:
outputTokens.append(token)
else:
outputTokens.append(token)
return outputTokens
def normalizeTokens(tokens):
# TODO: convert tests to reflect arrays
for i, token in enumerate(tokens):
if token[0] == u'ParseError':
tokens[i] = token[0]
return tokens
def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
ignoreErrors=False):
"""Test whether the test has passed or failed
If the ignoreErrorOrder flag is set to true we don't test the relative
positions of parse errors and non parse errors
"""
checkSelfClosing= False
for token in expectedTokens:
if (token[0] == "StartTag" and len(token) == 4
or token[0] == "EndTag" and len(token) == 3):
checkSelfClosing = True
break
if not checkSelfClosing:
for token in receivedTokens:
if token[0] == "StartTag" or token[0] == "EndTag":
token.pop()
if not ignoreErrorOrder and not ignoreErrors:
return expectedTokens == receivedTokens
else:
#Sort the tokens into two groups; non-parse errors and parse errors
tokens = {"expected":[[],[]], "received":[[],[]]}
for tokenType, tokenList in zip(tokens.keys(),
(expectedTokens, receivedTokens)):
for token in tokenList:
if token != "ParseError":
tokens[tokenType][0].append(token)
else:
if not ignoreErrors:
tokens[tokenType][1].append(token)
return tokens["expected"] == tokens["received"]
def unescape_test(test):
def decode(inp):
return inp.decode("unicode-escape")
test["input"] = decode(test["input"])
for token in test["output"]:
if token == "ParseError":
continue
else:
token[1] = decode(token[1])
if len(token) > 2:
for key, value in token[2]:
del token[2][key]
token[2][decode(key)] = decode(value)
return test
def runTokenizerTest(test):
#XXX - move this out into the setup function
#concatenate all consecutive character tokens into a single token
if 'doubleEscaped' in test:
test = unescape_test(test)
expected = concatenateCharacterTokens(test['output'])
if 'lastStartTag' not in test:
test['lastStartTag'] = None
outBuffer = cStringIO.StringIO()
stdout = sys.stdout
sys.stdout = outBuffer
parser = TokenizerTestParser(test['initialState'],
test['lastStartTag'])
tokens = parser.parse(test['input'])
tokens = concatenateCharacterTokens(tokens)
received = normalizeTokens(tokens)
errorMsg = u"\n".join(["\n\nInitial state:",
test['initialState'] ,
"\nInput:", unicode(test['input']),
"\nExpected:", unicode(expected),
"\nreceived:", unicode(tokens)])
errorMsg = errorMsg.encode("utf-8")
ignoreErrorOrder = test.get('ignoreErrorOrder', False)
assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
def _doCapitalize(match):
return match.group(1).upper()
_capitalizeRe = re.compile(r"\W+(\w)").sub
def capitalize(s):
s = s.lower()
s = _capitalizeRe(_doCapitalize, s)
return s
def test_tokenizer():
for filename in html5lib_test_files('tokenizer', '*.test'):
tests = json.load(file(filename))
testName = os.path.basename(filename).replace(".test","")
if 'tests' in tests:
for index,test in enumerate(tests['tests']):
#Skip tests with a self closing flag
skip = False
if 'initialStates' not in test:
test["initialStates"] = ["Data state"]
for initialState in test["initialStates"]:
test["initialState"] = capitalize(initialState)
yield runTokenizerTest, test

View File

@@ -1,311 +0,0 @@
import os
import sys
import StringIO
import unittest
import warnings
warnings.simplefilter("error")
from support import html5lib_test_files, TestData, convertExpected
from html5lib import html5parser, treewalkers, treebuilders, constants
from html5lib.filters.lint import Filter as LintFilter, LintError
def PullDOMAdapter(node):
from xml.dom import Node
from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
for childNode in node.childNodes:
for event in PullDOMAdapter(childNode):
yield event
elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
elif node.nodeType == Node.COMMENT_NODE:
yield COMMENT, node
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
yield CHARACTERS, node
elif node.nodeType == Node.ELEMENT_NODE:
yield START_ELEMENT, node
for childNode in node.childNodes:
for event in PullDOMAdapter(childNode):
yield event
yield END_ELEMENT, node
else:
raise NotImplementedError("Node type not supported: " + str(node.nodeType))
treeTypes = {
"simpletree": {"builder": treebuilders.getTreeBuilder("simpletree"),
"walker": treewalkers.getTreeWalker("simpletree")},
"DOM": {"builder": treebuilders.getTreeBuilder("dom"),
"walker": treewalkers.getTreeWalker("dom")},
"PullDOM": {"builder": treebuilders.getTreeBuilder("dom"),
"adapter": PullDOMAdapter,
"walker": treewalkers.getTreeWalker("pulldom")},
}
#Try whatever etree implementations are available from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
pass
try:
import xml.etree.cElementTree as ElementTree
treeTypes['cElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
try:
import cElementTree as ElementTree
treeTypes['cElementTree'] = \
{"builder": treebuilders.getTreeBuilder("etree", ElementTree),
"walker": treewalkers.getTreeWalker("etree", ElementTree)}
except ImportError:
pass
try:
import lxml.etree as ElementTree
# treeTypes['lxml_as_etree'] = \
# {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
# "walker": treewalkers.getTreeWalker("etree", ElementTree)}
treeTypes['lxml_native'] = \
{"builder": treebuilders.getTreeBuilder("lxml"),
"walker": treewalkers.getTreeWalker("lxml")}
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = \
{"builder": treebuilders.getTreeBuilder("beautifulsoup"),
"walker": treewalkers.getTreeWalker("beautifulsoup")}
except ImportError:
pass
#Try whatever etree implementations are available from a list that are
#"supposed" to work
try:
import pxdom
treeTypes['pxdom'] = \
{"builder": treebuilders.getTreeBuilder("dom", pxdom),
"walker": treewalkers.getTreeWalker("dom")}
except ImportError:
pass
try:
from genshi.core import QName, Attrs
from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
def GenshiAdapter(tree):
text = None
for token in treewalkers.getTreeWalker("simpletree")(tree):
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
if text is None:
text = token["data"]
else:
text += token["data"]
elif text is not None:
yield TEXT, text, (None, -1, -1)
text = None
if type in ("StartTag", "EmptyTag"):
if token["namespace"]:
name = u"{%s}%s" % (token["namespace"], token["name"])
else:
name = token["name"]
yield (START,
(QName(name),
Attrs([(QName(attr),value) for attr,value in token["data"]])),
(None, -1, -1))
if type == "EmptyTag":
type = "EndTag"
if type == "EndTag":
yield END, QName(token["name"]), (None, -1, -1)
elif type == "Comment":
yield COMMENT, token["data"], (None, -1, -1)
elif type == "Doctype":
yield DOCTYPE, (token["name"], token["publicId"],
token["systemId"]), (None, -1, -1)
else:
pass # FIXME: What to do?
if text is not None:
yield TEXT, text, (None, -1, -1)
#treeTypes["genshi"] = \
# {"builder": treebuilders.getTreeBuilder("simpletree"),
# "adapter": GenshiAdapter,
# "walker": treewalkers.getTreeWalker("genshi")}
except ImportError:
pass
def concatenateCharacterTokens(tokens):
charactersToken = None
for token in tokens:
type = token["type"]
if type in ("Characters", "SpaceCharacters"):
if charactersToken is None:
charactersToken = {"type": "Characters", "data": token["data"]}
else:
charactersToken["data"] += token["data"]
else:
if charactersToken is not None:
yield charactersToken
charactersToken = None
yield token
if charactersToken is not None:
yield charactersToken
def convertTokens(tokens):
output = []
indent = 0
for token in concatenateCharacterTokens(tokens):
type = token["type"]
if type in ("StartTag", "EmptyTag"):
if (token["namespace"] and
token["namespace"] != constants.namespaces["html"]):
if token["namespace"] in constants.prefixes:
name = constants.prefixes[token["namespace"]]
else:
name = token["namespace"]
name += u" " + token["name"]
else:
name = token["name"]
output.append(u"%s<%s>" % (" "*indent, name))
indent += 2
attrs = token["data"]
if attrs:
#TODO: Remove this if statement, attrs should always exist
for (namespace,name),value in sorted(attrs.items()):
if namespace:
if namespace in constants.prefixes:
outputname = constants.prefixes[namespace]
else:
outputname = namespace
outputname += u" " + name
else:
outputname = name
output.append(u"%s%s=\"%s\"" % (" "*indent, outputname, value))
if type == "EmptyTag":
indent -= 2
elif type == "EndTag":
indent -= 2
elif type == "Comment":
output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
elif type == "Doctype":
if token["name"]:
if token["publicId"]:
output.append("""%s<!DOCTYPE %s "%s" "%s">"""%
(" "*indent, token["name"],
token["publicId"],
token["systemId"] and token["systemId"] or ""))
elif token["systemId"]:
output.append("""%s<!DOCTYPE %s "" "%s">"""%
(" "*indent, token["name"],
token["systemId"]))
else:
output.append("%s<!DOCTYPE %s>"%(" "*indent,
token["name"]))
else:
output.append("%s<!DOCTYPE >" % (" "*indent,))
elif type in ("Characters", "SpaceCharacters"):
output.append("%s\"%s\"" % (" "*indent, token["data"]))
else:
pass # TODO: what to do with errors?
return u"\n".join(output)
import re
attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
def sortattrs(x):
lines = x.group(0).split("\n")
lines.sort()
return "\n".join(lines)
class TokenTestCase(unittest.TestCase):
def test_all_tokens(self):
expected = [
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
{'data': u'a', 'type': 'Characters'},
{'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
{'data': u'b', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
{'data': u'c', 'type': 'Characters'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
{'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'}
]
for treeName, treeCls in treeTypes.iteritems():
p = html5parser.HTMLParser(tree = treeCls["builder"])
document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
document = treeCls.get("adapter", lambda x: x)(document)
output = treeCls["walker"](document)
for expectedToken, outputToken in zip(expected, output):
self.assertEquals(expectedToken, outputToken)
def run_test(innerHTML, input, expected, errors, treeClass):
try:
p = html5parser.HTMLParser(tree = treeClass["builder"])
if innerHTML:
document = p.parseFragment(StringIO.StringIO(input), innerHTML)
else:
document = p.parse(StringIO.StringIO(input))
except constants.DataLossWarning:
#Ignore testcases we know we don't pass
return
document = treeClass.get("adapter", lambda x: x)(document)
try:
output = convertTokens(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
expected = attrlist.sub(sortattrs, convertExpected(expected))
assert expected == output, "\n".join([
"", "Input:", input,
"", "Expected:", expected,
"", "Received:", output
])
except NotImplementedError:
pass # Amnesty for those that confess...
def test_treewalker():
sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
for treeName, treeCls in treeTypes.iteritems():
files = html5lib_test_files('tree-construction')
for filename in files:
testName = os.path.basename(filename).replace(".dat","")
tests = TestData(filename, "data")
for index, test in enumerate(tests):
(input, errors,
innerHTML, expected) = [test[key] for key in ("data", "errors",
"document-fragment",
"document")]
errors = errors.split("\n")
yield run_test, innerHTML, input, expected, errors, treeCls

View File

@@ -1,123 +0,0 @@
import unittest
from html5lib.filters.whitespace import Filter
from html5lib.constants import spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
class TestCase(unittest.TestCase):
def runTest(self, input, expected):
output = list(Filter(input))
errorMsg = "\n".join(["\n\nInput:", str(input),
"\nExpected:", str(expected),
"\nReceived:", str(output)])
self.assertEquals(output, expected, errorMsg)
def runTestUnmodifiedOutput(self, input):
self.runTest(input, input)
def testPhrasingElements(self):
self.runTestUnmodifiedOutput(
[{"type": u"Characters", "data": u"This is a " },
{"type": u"StartTag", "name": u"span", "data": [] },
{"type": u"Characters", "data": u"phrase" },
{"type": u"EndTag", "name": u"span", "data": []},
{"type": u"SpaceCharacters", "data": u" " },
{"type": u"Characters", "data": u"with" },
{"type": u"SpaceCharacters", "data": u" " },
{"type": u"StartTag", "name": u"em", "data": [] },
{"type": u"Characters", "data": u"emphasised text" },
{"type": u"EndTag", "name": u"em", "data": []},
{"type": u"Characters", "data": u" and an " },
{"type": u"StartTag", "name": u"img", "data": [[u"alt", u"image"]] },
{"type": u"Characters", "data": u"." }])
def testLeadingWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"SpaceCharacters", "data": u" "},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testLeadingWhitespaceAsCharacters(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": spaceCharacters + u"foo"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u" foo"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testTrailingWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": u" "},
{"type": u"EndTag", "name": u"p", "data": []}])
def testTrailingWhitespaceAsCharacters(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo "},
{"type": u"EndTag", "name": u"p", "data": []}])
def testWhitespace(self):
self.runTest(
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
{"type": u"EndTag", "name": u"p", "data": []}],
[{"type": u"StartTag", "name": u"p", "data": []},
{"type": u"Characters", "data": u"foo bar"},
{"type": u"EndTag", "name": u"p", "data": []}])
def testLeadingWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"Characters", "data": u"foo"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testLeadingWhitespaceAsCharactersInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": spaceCharacters + u"foo"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testTrailingWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo"},
{"type": u"SpaceCharacters", "data": spaceCharacters},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testTrailingWhitespaceAsCharactersInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters},
{"type": u"EndTag", "name": u"pre", "data": []}])
def testWhitespaceInPre(self):
self.runTestUnmodifiedOutput(
[{"type": u"StartTag", "name": u"pre", "data": []},
{"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
{"type": u"EndTag", "name": u"pre", "data": []}])
def buildTestSuite():
return unittest.defaultTestLoader.loadTestsFromName(__name__)
def main():
buildTestSuite()
unittest.main()
if __name__ == "__main__":
main()

View File

@@ -1,10 +0,0 @@
#data
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
<!--京-->
<title>Yahoo! JAPAN</title>
<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
<style type="text/css" media="all">
#encoding
euc_jp

File diff suppressed because one or more lines are too long

View File

@@ -1,115 +0,0 @@
#data
<meta
#encoding
windows-1252
#data
<
#encoding
windows-1252
#data
<!
#encoding
windows-1252
#data
<meta charset = "
#encoding
windows-1252
#data
<meta charset=euc_jp
#encoding
windows-1252
#data
<meta <meta charset='euc_jp'>
#encoding
euc_jp
#data
<meta charset = 'euc_jp'>
#encoding
euc_jp
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
#encoding
utf-8
#data
<!-- -->
<meta http-equiv="Content-Type" content="text/html; charset=utf
#encoding
windows-1252
#data
<meta http-equiv="Content-Type<meta charset="utf-8">
#encoding
windows-1252
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
#encoding
utf-8
#data
<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
#encoding
windows-1252
#data
<meta
#encoding
windows-1252
#data
<meta charset =
#encoding
windows-1252
#data
<meta charset= utf-8
>
#encoding
utf-8
#data
<meta content = "text/html;
#encoding
windows-1252
#data
<meta charset="UTF-16">
#encoding
utf-8
#data
<meta charset="UTF-16LE">
#encoding
utf-8
#data
<meta charset="UTF-16BE">
#encoding
utf-8
#data
<html a=ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html ñ>
<meta charset="utf-8">
#encoding
utf-8
#data
<html>ñ
<meta charset="utf-8">
#encoding
utf-8

View File

@@ -1,501 +0,0 @@
[
{
"name": "IE_Comments",
"input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
"output": ""
},
{
"name": "IE_Comments_2",
"input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
"output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "allow_colons_in_path_component",
"input": "<a href=\"./this:that\">foo</a>",
"output": "<a href='./this:that'>foo</a>"
},
{
"name": "background_attribute",
"input": "<div background=\"javascript:alert('XSS')\"></div>",
"output": "<div/>",
"xhtml": "<div></div>",
"rexml": "<div></div>"
},
{
"name": "bgsound",
"input": "<bgsound src=\"javascript:alert('XSS');\" />",
"output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
},
{
"name": "div_background_image_unicode_encoded",
"input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "div_expression",
"input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
"output": "<div style=''>foo</div>"
},
{
"name": "double_open_angle_brackets",
"input": "<img src=http://ha.ckers.org/scriptlet.html <",
"output": "<img src='http://ha.ckers.org/scriptlet.html'>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "double_open_angle_brackets_2",
"input": "<script src=http://ha.ckers.org/scriptlet.html <",
"output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "grave_accents",
"input": "<img src=`javascript:alert('XSS')` />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "img_dynsrc_lowsrc",
"input": "<img dynsrc=\"javascript:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "img_vbscript",
"input": "<img src='vbscript:msgbox(\"XSS\")' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "input_image",
"input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
"output": "<input type='image'/>",
"rexml": "<input type='image' />"
},
{
"name": "link_stylesheets",
"input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;",
"rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
},
{
"name": "link_stylesheets_2",
"input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
"output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;",
"rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
},
{
"name": "list_style_image",
"input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
"output": "<li style=''>foo</li>"
},
{
"name": "no_closing_script_tags",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit",
"input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_2",
"input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_3",
"input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
"output": "<img src='http://ha.ckers.org/xss.js'/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_II",
"input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "non_alpha_non_digit_III",
"input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
"output": "<a>foo</a>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "platypus",
"input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
"output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
},
{
"name": "protocol_resolution_in_script_tag",
"input": "<script src=//ha.ckers.org/.j></script>",
"output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_allow_anchors",
"input": "<a href='foo' onclick='bar'><script>baz</script></a>",
"output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
},
{
"name": "should_allow_image_alt_attribute",
"input": "<img alt='foo' onclick='bar' />",
"output": "<img alt='foo'/>",
"rexml": "<img alt='foo' />"
},
{
"name": "should_allow_image_height_attribute",
"input": "<img height='foo' onclick='bar' />",
"output": "<img height='foo'/>",
"rexml": "<img height='foo' />"
},
{
"name": "should_allow_image_src_attribute",
"input": "<img src='foo' onclick='bar' />",
"output": "<img src='foo'/>",
"rexml": "<img src='foo' />"
},
{
"name": "should_allow_image_width_attribute",
"input": "<img width='foo' onclick='bar' />",
"output": "<img width='foo'/>",
"rexml": "<img width='foo' />"
},
{
"name": "should_handle_blank_text",
"input": "",
"output": ""
},
{
"name": "should_handle_malformed_image_tags",
"input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
"output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_handle_non_html",
"input": "abc",
"output": "abc"
},
{
"name": "should_not_fall_for_ridiculous_hack",
"input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_0",
"input": "<img src=\"javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_1",
"input": "<img src=javascript:alert('XSS') />",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_not_fall_for_xss_image_hack_10",
"input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_11",
"input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_12",
"input": "<img src=\" &#14; javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_13",
"input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_14",
"input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_2",
"input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_3",
"input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_4",
"input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_5",
"input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_6",
"input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_7",
"input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_8",
"input": "<img src=\"jav\tascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_not_fall_for_xss_image_hack_9",
"input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
"output": "<img/>",
"rexml": "<img />"
},
{
"name": "should_sanitize_half_open_scripts",
"input": "<img src=\"javascript:alert('XSS')\"",
"output": "<img/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_invalid_script_tag",
"input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
"output": "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets",
"input": "<<script>alert(\"XSS\");//<</script>",
"output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
"input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
"output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_tag_broken_up_by_null",
"input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
"output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_sanitize_unclosed_script",
"input": "<script src=http://ha.ckers.org/xss.js?<b>",
"output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
"rexml": "Ill-formed XHTML!"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols",
"input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
"input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
"output": "<a title='1'>boo</a>"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols",
"input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
"input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
"output": "<img title='1'/>boo",
"rexml": "<img title='1' />"
},
{
"name": "xml_base",
"input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
"output": "<div>foo</div>"
},
{
"name": "xul",
"input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
"output": "<p style=''>fubar</p>"
},
{
"name": "quotes_in_attributes",
"input": "<img src='foo' title='\"foo\" bar' />",
"rexml": "<img src='foo' title='\"foo\" bar' />",
"output": "<img title='&quot;foo&quot; bar' src='foo'/>"
},
{
"name": "uri_refs_in_svg_attributes",
"input": "<rect fill='url(#foo)' />",
"rexml": "<rect fill='url(#foo)'></rect>",
"xhtml": "<rect fill='url(#foo)'></rect>",
"output": "<rect fill='url(#foo)'/>"
},
{
"name": "absolute_uri_refs_in_svg_attributes",
"input": "<rect fill='url(http://bad.com/) #fff' />",
"rexml": "<rect fill=' #fff'></rect>",
"xhtml": "<rect fill=' #fff'></rect>",
"output": "<rect fill=' #fff'/>"
},
{
"name": "uri_ref_with_space_in svg_attribute",
"input": "<rect fill='url(\n#foo)' />",
"rexml": "<rect fill='url(\n#foo)'></rect>",
"xhtml": "<rect fill='url(\n#foo)'></rect>",
"output": "<rect fill='url(\n#foo)'/>"
},
{
"name": "absolute_uri_ref_with_space_in svg_attribute",
"input": "<rect fill=\"url(\nhttp://bad.com/)\" />",
"rexml": "<rect fill=' '></rect>",
"xhtml": "<rect fill=' '></rect>",
"output": "<rect fill=' '/>"
},
{
"name": "allow_html5_image_tag",
"input": "<image src='foo' />",
"rexml": "&lt;image src=\"foo\"&gt;&lt;/image&gt;",
"output": "&lt;image src=\"foo\"/&gt;"
},
{
"name": "style_attr_end_with_nothing",
"input": "<div style=\"color: blue\" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "style_attr_end_with_space",
"input": "<div style=\"color: blue \" />",
"output": "<div style='color: blue ;'/>",
"xhtml": "<div style='color: blue ;'></div>",
"rexml": "<div style='color: blue ;'></div>"
},
{
"name": "style_attr_end_with_semicolon",
"input": "<div style=\"color: blue;\" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "style_attr_end_with_semicolon_space",
"input": "<div style=\"color: blue; \" />",
"output": "<div style='color: blue;'/>",
"xhtml": "<div style='color: blue;'></div>",
"rexml": "<div style='color: blue;'></div>"
},
{
"name": "attributes_with_embedded_quotes",
"input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
"output": "<img src='doesntexist.jpg&quot;&apos;onerror=&quot;alert(1)'/>",
"rexml": "Ill-formed XHTML!"
},
{
"name": "attributes_with_embedded_quotes_II",
"input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
"output": "<img src='notthere.jpg&quot;&quot;onerror=&quot;alert(2)'/>",
"rexml": "Ill-formed XHTML!"
}
]

View File

@@ -1,125 +0,0 @@
{"tests": [
{"description": "proper attribute value escaping",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
"expected": ["<span title='test \"with\" &amp;quot;'>"]
},
{"description": "proper attribute value non-quoting",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
"expected": ["<span title=foo>"],
"xhtml": ["<span title=\"foo\">"]
},
{"description": "proper attribute value non-quoting (with <)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
"expected": ["<span title=foo<bar>"],
"xhtml": ["<span title=\"foo&lt;bar\">"]
},
{"description": "proper attribute value quoting (with =)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
"expected": ["<span title=\"foo=bar\">"]
},
{"description": "proper attribute value quoting (with >)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
"expected": ["<span title=\"foo>bar\">"]
},
{"description": "proper attribute value quoting (with \")",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
"expected": ["<span title='foo\"bar'>"]
},
{"description": "proper attribute value quoting (with ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
"expected": ["<span title=\"foo'bar\">"]
},
{"description": "proper attribute value quoting (with both \" and ')",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
"expected": ["<span title=\"foo'bar&quot;baz\">"]
},
{"description": "proper attribute value quoting (with space)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
"expected": ["<span title=\"foo bar\">"]
},
{"description": "proper attribute value quoting (with tab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
"expected": ["<span title=\"foo\tbar\">"]
},
{"description": "proper attribute value quoting (with LF)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
"expected": ["<span title=\"foo\nbar\">"]
},
{"description": "proper attribute value quoting (with CR)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
"expected": ["<span title=\"foo\rbar\">"]
},
{"description": "proper attribute value non-quoting (with linetab)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
"expected": ["<span title=foo\u000Bbar>"],
"xhtml": ["<span title=\"foo\u000Bbar\">"]
},
{"description": "proper attribute value quoting (with form feed)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
"expected": ["<span title=\"foo\u000Cbar\">"]
},
{"description": "void element (as EmptyTag token)",
"input": [["EmptyTag", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "void element (as StartTag token)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
"expected": ["<img>"],
"xhtml": ["<img />"]
},
{"description": "doctype in error",
"input": [["Doctype", "foo"]],
"expected": ["<!DOCTYPE foo>"]
},
{"description": "character data",
"options": {"encoding":"utf-8"},
"input": [["Characters", "a<b>c&d"]],
"expected": ["a&lt;b&gt;c&amp;d"]
},
{"description": "rcdata",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a<b>c&d"],
"xhtml": ["<script>a&lt;b&gt;c&amp;d"]
},
{"description": "doctype",
"input": [["Doctype", "HTML"]],
"expected": ["<!DOCTYPE HTML>"]
},
{"description": "HTML 4.01 DOCTYPE",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
},
{"description": "HTML 4.01 DOCTYPE without system identifer",
"input": [["Doctype", "HTML", "-//W3C//DTD HTML 4.01//EN"]],
"expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
},
{"description": "IBM DOCTYPE without public identifer",
"input": [["Doctype", "html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
"expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
}
]}

View File

@@ -1,66 +0,0 @@
{"tests": [
{"description": "no encoding",
"options": {"inject_meta_charset": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""],
"xhtml": ["<head></head>"]
},
{"description": "empytag head",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/title",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><title>foo</title>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
},
{"description": "head w/meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8>"],
"xhtml": ["<head><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ two meta-charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
},
{"description": "head w/robots",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
"xhtml": ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
},
{"description": "head w/robots & charset",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
},
{"description": "head w/ charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
},
{"description": "head w/robots & charset in http-equiv content-type",
"options": {"inject_meta_charset": true, "encoding":"utf-8"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
"xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
}
]}

View File

@@ -1,965 +0,0 @@
{"tests": [
{"description": "html start-tag followed by text, with attributes",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
"expected": ["<html lang=en>foo"]
},
{"description": "html start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
"expected": ["<html><!--foo-->"]
},
{"description": "html start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
"expected": ["<html> foo"]
},
{"description": "html start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
"expected": [""]
},
{"description": "html end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
"expected": ["</html><!--foo-->"]
},
{"description": "html end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
"expected": ["</html> foo"]
},
{"description": "html end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "html end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "html end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "html end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
"expected": [""]
},
{"description": "head start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
"expected": ["<head><!--foo-->"]
},
{"description": "head start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
"expected": ["<head> foo"]
},
{"description": "head start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
"expected": ["<head>foo"]
},
{"description": "head start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<head></foo>", "</foo>"]
},
{"description": "empty head element",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "head start-tag followed by empty-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
"expected": ["<head>", ""]
},
{"description": "head end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
"expected": ["</head><!--foo-->"]
},
{"description": "head end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
"expected": ["</head> foo"]
},
{"description": "head end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "head end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "head end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "head end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
"expected": [""]
},
{"description": "body start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
"expected": ["<body><!--foo-->"]
},
{"description": "body start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
"expected": ["<body> foo"]
},
{"description": "body start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body start-tag at EOF (shouldn't ever happen?!)",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
"expected": [""]
},
{"description": "body end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
"expected": ["</body><!--foo-->"]
},
{"description": "body end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
"expected": ["</body> foo"]
},
{"description": "body end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "body end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "body end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "body end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
"expected": [""]
},
{"description": "li end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
"expected": ["</li><!--foo-->"]
},
{"description": "li end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
"expected": ["</li> foo"]
},
{"description": "li end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
"expected": ["</li>foo"]
},
{"description": "li end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</li><foo>"]
},
{"description": "li end-tag followed by li start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
"expected": ["<li>"]
},
{"description": "li end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "li end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
"expected": [""]
},
{"description": "dt end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
"expected": ["</dt><!--foo-->"]
},
{"description": "dt end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
"expected": ["</dt> foo"]
},
{"description": "dt end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
"expected": ["</dt>foo"]
},
{"description": "dt end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dt><foo>"]
},
{"description": "dt end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dt end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dt end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</dt></foo>"]
},
{"description": "dt end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
"expected": ["</dt>"]
},
{"description": "dd end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
"expected": ["</dd><!--foo-->"]
},
{"description": "dd end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
"expected": ["</dd> foo"]
},
{"description": "dd end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
"expected": ["</dd>foo"]
},
{"description": "dd end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</dd><foo>"]
},
{"description": "dd end-tag followed by dd start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
"expected": ["<dd>"]
},
{"description": "dd end-tag followed by dt start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
"expected": ["<dt>"]
},
{"description": "dd end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "dd end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
"expected": [""]
},
{"description": "p end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
"expected": ["</p><!--foo-->"]
},
{"description": "p end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
"expected": ["</p> foo"]
},
{"description": "p end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
"expected": ["</p>foo"]
},
{"description": "p end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</p><foo>"]
},
{"description": "p end-tag followed by address start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
"expected": ["<address>"]
},
{"description": "p end-tag followed by article start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
"expected": ["<article>"]
},
{"description": "p end-tag followed by aside start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
"expected": ["<aside>"]
},
{"description": "p end-tag followed by blockquote start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
"expected": ["<blockquote>"]
},
{"description": "p end-tag followed by datagrid start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
"expected": ["<datagrid>"]
},
{"description": "p end-tag followed by dialog start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
"expected": ["<dialog>"]
},
{"description": "p end-tag followed by dir start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
"expected": ["<dir>"]
},
{"description": "p end-tag followed by div start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "p end-tag followed by dl start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
"expected": ["<dl>"]
},
{"description": "p end-tag followed by fieldset start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
"expected": ["<fieldset>"]
},
{"description": "p end-tag followed by footer start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
"expected": ["<footer>"]
},
{"description": "p end-tag followed by form start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
"expected": ["<form>"]
},
{"description": "p end-tag followed by h1 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
"expected": ["<h1>"]
},
{"description": "p end-tag followed by h2 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
"expected": ["<h2>"]
},
{"description": "p end-tag followed by h3 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
"expected": ["<h3>"]
},
{"description": "p end-tag followed by h4 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
"expected": ["<h4>"]
},
{"description": "p end-tag followed by h5 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
"expected": ["<h5>"]
},
{"description": "p end-tag followed by h6 start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
"expected": ["<h6>"]
},
{"description": "p end-tag followed by header start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
"expected": ["<header>"]
},
{"description": "p end-tag followed by hr empty-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
"expected": ["<hr>"]
},
{"description": "p end-tag followed by menu start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
"expected": ["<menu>"]
},
{"description": "p end-tag followed by nav start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
"expected": ["<nav>"]
},
{"description": "p end-tag followed by ol start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
"expected": ["<ol>"]
},
{"description": "p end-tag followed by p start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
"expected": ["<p>"]
},
{"description": "p end-tag followed by pre start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
"expected": ["<pre>"]
},
{"description": "p end-tag followed by section start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
"expected": ["<section>"]
},
{"description": "p end-tag followed by table start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
"expected": ["<table>"]
},
{"description": "p end-tag followed by ul start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
"expected": ["<ul>"]
},
{"description": "p end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "p end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
"expected": [""]
},
{"description": "optgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
"expected": ["</optgroup><!--foo-->"]
},
{"description": "optgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
"expected": ["</optgroup> foo"]
},
{"description": "optgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
"expected": ["</optgroup>foo"]
},
{"description": "optgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</optgroup><foo>"]
},
{"description": "optgroup end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "optgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "optgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
"expected": [""]
},
{"description": "option end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
"expected": ["</option><!--foo-->"]
},
{"description": "option end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
"expected": ["</option> foo"]
},
{"description": "option end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
"expected": ["</option>foo"]
},
{"description": "option end-tag followed by optgroup start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
"expected": ["<optgroup>"]
},
{"description": "option end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</option><foo>"]
},
{"description": "option end-tag followed by option start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
"expected": ["<option>"]
},
{"description": "option end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "option end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
"expected": [""]
},
{"description": "colgroup start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
"expected": ["<colgroup><!--foo-->"]
},
{"description": "colgroup start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
"expected": ["<colgroup> foo"]
},
{"description": "colgroup start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
"expected": ["<colgroup>foo"]
},
{"description": "colgroup start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<colgroup><foo>"]
},
{"description": "first colgroup in a table with a col child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
"expected": ["<table><col>"]
},
{"description": "colgroup with a col child, following another colgroup",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
"expected": ["</colgroup><col>", "<colgroup><col>"]
},
{"description": "colgroup start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<colgroup></foo>"]
},
{"description": "colgroup start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
"expected": ["<colgroup>"]
},
{"description": "colgroup end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
"expected": ["</colgroup><!--foo-->"]
},
{"description": "colgroup end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
"expected": ["</colgroup> foo"]
},
{"description": "colgroup end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
"expected": ["foo"]
},
{"description": "colgroup end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<foo>"]
},
{"description": "colgroup end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "colgroup end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
"expected": [""]
},
{"description": "thead end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
"expected": ["</thead><!--foo-->"]
},
{"description": "thead end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
"expected": ["</thead> foo"]
},
{"description": "thead end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
"expected": ["</thead>foo"]
},
{"description": "thead end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</thead><foo>"]
},
{"description": "thead end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "thead end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "thead end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</thead></foo>"]
},
{"description": "thead end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
"expected": ["</thead>"]
},
{"description": "tbody start-tag followed by comment",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
"expected": ["<tbody><!--foo-->"]
},
{"description": "tbody start-tag followed by space character",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
"expected": ["<tbody> foo"]
},
{"description": "tbody start-tag followed by text",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
"expected": ["<tbody>foo"]
},
{"description": "tbody start-tag followed by start-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["<tbody><foo>"]
},
{"description": "first tbody in a table with a tr child",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<table><tr>"]
},
{"description": "tbody with a tr child, following another tbody",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tbody><tr>"]
},
{"description": "tbody with a tr child, following a thead",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</thead><tr>"]
},
{"description": "tbody with a tr child, following a tfoot",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tbody><tr>", "</tfoot><tr>"]
},
{"description": "tbody start-tag followed by end-tag",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["<tbody></foo>"]
},
{"description": "tbody start-tag at EOF",
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>"]
},
{"description": "tbody end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
"expected": ["</tbody><!--foo-->"]
},
{"description": "tbody end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
"expected": ["</tbody> foo"]
},
{"description": "tbody end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
"expected": ["</tbody>foo"]
},
{"description": "tbody end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tbody><foo>"]
},
{"description": "tbody end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tbody>"]
},
{"description": "tbody end-tag followed by tfoot start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
"expected": ["<tfoot>"]
},
{"description": "tbody end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tbody end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
"expected": [""]
},
{"description": "tfoot end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
"expected": ["</tfoot><!--foo-->"]
},
{"description": "tfoot end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
"expected": ["</tfoot> foo"]
},
{"description": "tfoot end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
"expected": ["</tfoot>foo"]
},
{"description": "tfoot end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tfoot><foo>"]
},
{"description": "tfoot end-tag followed by tbody start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
"expected": ["<tbody>", "</tfoot>"]
},
{"description": "tfoot end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tfoot end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
"expected": [""]
},
{"description": "tr end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
"expected": ["</tr><!--foo-->"]
},
{"description": "tr end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
"expected": ["</tr> foo"]
},
{"description": "tr end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
"expected": ["</tr>foo"]
},
{"description": "tr end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</tr><foo>"]
},
{"description": "tr end-tag followed by tr start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
"expected": ["<tr>", "</tr>"]
},
{"description": "tr end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "tr end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
"expected": [""]
},
{"description": "td end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
"expected": ["</td><!--foo-->"]
},
{"description": "td end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
"expected": ["</td> foo"]
},
{"description": "td end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
"expected": ["</td>foo"]
},
{"description": "td end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</td><foo>"]
},
{"description": "td end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</td>"]
},
{"description": "td end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</td>"]
},
{"description": "td end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "td end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
"expected": [""]
},
{"description": "th end-tag followed by comment",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
"expected": ["</th><!--foo-->"]
},
{"description": "th end-tag followed by space character",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
"expected": ["</th> foo"]
},
{"description": "th end-tag followed by text",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
"expected": ["</th>foo"]
},
{"description": "th end-tag followed by start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
"expected": ["</th><foo>"]
},
{"description": "th end-tag followed by th start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
"expected": ["<th>", "</th>"]
},
{"description": "th end-tag followed by td start-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
"expected": ["<td>", "</th>"]
},
{"description": "th end-tag followed by end-tag",
"input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
"expected": ["</foo>"]
},
{"description": "th end-tag at EOF",
"input": [["EndTag", "http://www.w3.org/1999/xhtml" , "th"]],
"expected": [""]
}
]}

View File

@@ -1,60 +0,0 @@
{"tests":[
{"description": "quote_char=\"'\"",
"options": {"quote_char": "'"},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
"expected": ["<span title='test &#39;with&#39; quote_char'>"]
},
{"description": "quote_attr_values=true",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
"expected": ["<button disabled>"],
"xhtml": ["<button disabled=\"disabled\">"]
},
{"description": "quote_attr_values=true with irrelevant",
"options": {"quote_attr_values": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "use_trailing_solidus=true with void element",
"options": {"use_trailing_solidus": true},
"input": [["EmptyTag", "img", {}]],
"expected": ["<img />"]
},
{"description": "use_trailing_solidus=true with non-void element",
"options": {"use_trailing_solidus": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
"expected": ["<div>"]
},
{"description": "minimize_boolean_attributes=false",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
"expected": ["<div irrelevant=irrelevant>"],
"xhtml": ["<div irrelevant=\"irrelevant\">"]
},
{"description": "minimize_boolean_attributes=false with empty value",
"options": {"minimize_boolean_attributes": false},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
"expected": ["<div irrelevant=\"\">"]
},
{"description": "escape less than signs in attribute values",
"options": {"escape_lt_in_attrs": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
"expected": ["<a title=\"a&lt;b>c&amp;d\">"]
},
{"description": "rcdata",
"options": {"escape_rcdata": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
"expected": ["<script>a&lt;b&gt;c&amp;d"]
}
]}

View File

@@ -1,51 +0,0 @@
{"tests": [
{"description": "bare text with leading spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "\t\r\n\u000C foo"]],
"expected": [" foo"]
},
{"description": "bare text with trailing spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C"]],
"expected": ["foo "]
},
{"description": "bare text with inner spaces",
"options": {"strip_whitespace": true},
"input": [["Characters", "foo \t\r\n\u000C bar"]],
"expected": ["foo bar"]
},
{"description": "text within <pre>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
},
{"description": "text within <pre>, with inner markup",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
"expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
},
{"description": "text within <textarea>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
"expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
},
{"description": "text within <script>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
"expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
},
{"description": "text within <style>",
"options": {"strip_whitespace": true},
"input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
"expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
}
]}

View File

@@ -1,43 +0,0 @@
[
{"type": "text/html", "input": ""},
{"type": "text/html", "input": "<!---->"},
{"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
{"type": "text/html", "input": "<!"},
{"type": "text/html", "input": "\t"},
{"type": "text/html", "input": "<!>"},
{"type": "text/html", "input": "<?"},
{"type": "text/html", "input": "<??>"},
{"type": "application/rss+xml", "input": "<rss"},
{"type": "application/atom+xml", "input": "<feed"},
{"type": "text/html", "input": "<html"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
{"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>Assimilated Press</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\"> <meta http-equiv=\"Content-Type\" content=\"te"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n <head>\n \n <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n <meta content='true' name='MSSmartTagsPreventParsing'/>\n <meta content='blogger' name='generator'/>\n <link rel=\"alternate\" typ"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /> \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n <title>The Sartorialist</title> <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n \n <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
{"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
{"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
{"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n <title>Atlas Shrugs</title>\n <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n <id>tag:typepad.com,2003:weblog-132946</id>\n <updated>2007-08-15T16:07:34-04"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Creating Passionate Users</title>\r\n "},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n <title>Seth's Blog</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n <link rel=\"s"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
{"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n <title>Go Fug Yourself</title>\r\n <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
{"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
{"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
{"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
]

View File

@@ -1,75 +0,0 @@
{"tests": [
{"description":"PLAINTEXT content model flag",
"initialStates":["PLAINTEXT state"],
"lastStartTag":"plaintext",
"input":"<head>&body;",
"output":[["Character", "<head>&body;"]]},
{"description":"End tag closing RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xMp>",
"output":[["Character", "foo"], ["EndTag", "xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp ",
"output":[["Character", "foo"], "ParseError"]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp",
"output":[["Character", "foo</xmp"]]},
{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp/",
"output":[["Character", "foo"], "ParseError"]},
{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp<",
"output":[["Character", "foo</xmp<"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmp>",
"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</foo>bar</xmpaar>",
"output":[["Character", "</foo>bar</xmpaar>"]]},
{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo</xmp></baz>",
"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
{"description":"RAWTEXT w/ something looking like an entity",
"initialStates":["RAWTEXT state"],
"lastStartTag":"xmp",
"input":"&foo;",
"output":[["Character", "&foo;"]]},
{"description":"RCDATA w/ an entity",
"initialStates":["RCDATA state"],
"lastStartTag":"textarea",
"input":"&lt;",
"output":[["Character", "<"]]}
]}

View File

@@ -1,90 +0,0 @@
{
"tests": [
{
"description":"CR in bogus comment state",
"input":"<?\u000d",
"output":["ParseError", ["Comment", "?\u000a"]]
},
{
"description":"CRLF in bogus comment state",
"input":"<?\u000d\u000a",
"output":["ParseError", ["Comment", "?\u000a"]]
},
{
"description":"NUL in RCDATA and RAWTEXT",
"doubleEscaped":true,
"initialStates":["RCDATA state", "RAWTEXT state"],
"input":"\\u0000",
"output":["ParseError", ["Character", "\\uFFFD"]]
},
{
"description":"skip first BOM but not later ones",
"input":"\uFEFFfoo\uFEFFbar",
"output":[["Character", "foo\uFEFFbar"]]
},
{
"description":"Non BMP-charref in in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTilde;",
"output":[["Character", "\u2242\u0338"]]
},
{
"description":"Bad charref in in RCDATA",
"initialStates":["RCDATA state"],
"input":"&NotEqualTild;",
"output":["ParseError", ["Character", "&NotEqualTild;"]]
},
{
"description":"lowercase endtags in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</XMP>",
"output":[["EndTag","xmp"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</ XMP>",
"output":[["Character","</ XMP>"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm>",
"output":[["Character","</xm>"]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm ",
"output":[["Character","</xm "]]
},
{
"description":"bad endtag in RCDATA and RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"</xm/",
"output":[["Character","</xm/"]]
},
{
"description":"Non BMP-charref in attribute",
"input":"<p id=\"&NotEqualTilde;\">",
"output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
},
{
"description":"--!NUL in comment ",
"doubleEscaped":true,
"input":"<!----!\\u0000-->",
"output":["ParseError", ["Comment", "--!\\uFFFD"]]
},
{
"description":"space EOF after doctype ",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "html", null, null , false]]
}
]
}

View File

@@ -1,283 +0,0 @@
{"tests": [
{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
"input":"<h a='&noti;'>",
"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
{"description": "Entity name followed by the equals sign in an attribute value.",
"input":"<h a='&lang='>",
"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]},
{"description": "CR as numeric entity",
"input":"&#013;",
"output": ["ParseError", ["Character", "\r"]]},
{"description": "CR as hexadecimal numeric entity",
"input":"&#x00D;",
"output": ["ParseError", ["Character", "\r"]]},
{"description": "Windows-1252 EURO SIGN numeric entity.",
"input":"&#0128;",
"output": ["ParseError", ["Character", "\u20AC"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0129;",
"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0130;",
"output": ["ParseError", ["Character", "\u201A"]]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
"input":"&#0131;",
"output": ["ParseError", ["Character", "\u0192"]]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
"input":"&#0132;",
"output": ["ParseError", ["Character", "\u201E"]]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
"input":"&#0133;",
"output": ["ParseError", ["Character", "\u2026"]]},
{"description": "Windows-1252 DAGGER numeric entity.",
"input":"&#0134;",
"output": ["ParseError", ["Character", "\u2020"]]},
{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
"input":"&#0135;",
"output": ["ParseError", ["Character", "\u2021"]]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
"input":"&#0136;",
"output": ["ParseError", ["Character", "\u02C6"]]},
{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
"input":"&#0137;",
"output": ["ParseError", ["Character", "\u2030"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
"input":"&#0138;",
"output": ["ParseError", ["Character", "\u0160"]]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0139;",
"output": ["ParseError", ["Character", "\u2039"]]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
"input":"&#0140;",
"output": ["ParseError", ["Character", "\u0152"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0141;",
"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
"input":"&#0142;",
"output": ["ParseError", ["Character", "\u017D"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0143;",
"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0144;",
"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0145;",
"output": ["ParseError", ["Character", "\u2018"]]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
"input":"&#0146;",
"output": ["ParseError", ["Character", "\u2019"]]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0147;",
"output": ["ParseError", ["Character", "\u201C"]]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
"input":"&#0148;",
"output": ["ParseError", ["Character", "\u201D"]]},
{"description": "Windows-1252 BULLET numeric entity.",
"input":"&#0149;",
"output": ["ParseError", ["Character", "\u2022"]]},
{"description": "Windows-1252 EN DASH numeric entity.",
"input":"&#0150;",
"output": ["ParseError", ["Character", "\u2013"]]},
{"description": "Windows-1252 EM DASH numeric entity.",
"input":"&#0151;",
"output": ["ParseError", ["Character", "\u2014"]]},
{"description": "Windows-1252 SMALL TILDE numeric entity.",
"input":"&#0152;",
"output": ["ParseError", ["Character", "\u02DC"]]},
{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
"input":"&#0153;",
"output": ["ParseError", ["Character", "\u2122"]]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
"input":"&#0154;",
"output": ["ParseError", ["Character", "\u0161"]]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
"input":"&#0155;",
"output": ["ParseError", ["Character", "\u203A"]]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
"input":"&#0156;",
"output": ["ParseError", ["Character", "\u0153"]]},
{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
"input":"&#0157;",
"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
"input":"&#x080;",
"output": ["ParseError", ["Character", "\u20AC"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x081;",
"output": ["ParseError", ["Character", "\u0081"]]},
{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x082;",
"output": ["ParseError", ["Character", "\u201A"]]},
{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
"input":"&#x083;",
"output": ["ParseError", ["Character", "\u0192"]]},
{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x084;",
"output": ["ParseError", ["Character", "\u201E"]]},
{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
"input":"&#x085;",
"output": ["ParseError", ["Character", "\u2026"]]},
{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
"input":"&#x086;",
"output": ["ParseError", ["Character", "\u2020"]]},
{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
"input":"&#x087;",
"output": ["ParseError", ["Character", "\u2021"]]},
{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
"input":"&#x088;",
"output": ["ParseError", ["Character", "\u02C6"]]},
{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
"input":"&#x089;",
"output": ["ParseError", ["Character", "\u2030"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x08A;",
"output": ["ParseError", ["Character", "\u0160"]]},
{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x08B;",
"output": ["ParseError", ["Character", "\u2039"]]},
{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x08C;",
"output": ["ParseError", ["Character", "\u0152"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08D;",
"output": ["ParseError", ["Character", "\u008D"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x08E;",
"output": ["ParseError", ["Character", "\u017D"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x08F;",
"output": ["ParseError", ["Character", "\u008F"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x090;",
"output": ["ParseError", ["Character", "\u0090"]]},
{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x091;",
"output": ["ParseError", ["Character", "\u2018"]]},
{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x092;",
"output": ["ParseError", ["Character", "\u2019"]]},
{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x093;",
"output": ["ParseError", ["Character", "\u201C"]]},
{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x094;",
"output": ["ParseError", ["Character", "\u201D"]]},
{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
"input":"&#x095;",
"output": ["ParseError", ["Character", "\u2022"]]},
{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
"input":"&#x096;",
"output": ["ParseError", ["Character", "\u2013"]]},
{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
"input":"&#x097;",
"output": ["ParseError", ["Character", "\u2014"]]},
{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
"input":"&#x098;",
"output": ["ParseError", ["Character", "\u02DC"]]},
{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
"input":"&#x099;",
"output": ["ParseError", ["Character", "\u2122"]]},
{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
"input":"&#x09A;",
"output": ["ParseError", ["Character", "\u0161"]]},
{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
"input":"&#x09B;",
"output": ["ParseError", ["Character", "\u203A"]]},
{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
"input":"&#x09C;",
"output": ["ParseError", ["Character", "\u0153"]]},
{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
"input":"&#x09D;",
"output": ["ParseError", ["Character", "\u009D"]]},
{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
"input":"&#x09E;",
"output": ["ParseError", ["Character", "\u017E"]]},
{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
"input":"&#x09F;",
"output": ["ParseError", ["Character", "\u0178"]]},
{"description": "Decimal numeric entity followed by hex character a.",
"input":"&#97a",
"output": ["ParseError", ["Character", "aa"]]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97A",
"output": ["ParseError", ["Character", "aA"]]},
{"description": "Decimal numeric entity followed by hex character f.",
"input":"&#97f",
"output": ["ParseError", ["Character", "af"]]},
{"description": "Decimal numeric entity followed by hex character A.",
"input":"&#97F",
"output": ["ParseError", ["Character", "aF"]]}
]}

View File

@@ -1,33 +0,0 @@
{"tests": [
{"description":"Commented close tag in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--</xmp>--></xmp>",
"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
{"description":"Bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-->baz</xmp>",
"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!--></xmp><!-->baz</xmp>",
"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
{"description":"Commented entities in RCDATA",
"initialStates":["RCDATA state"],
"lastStartTag":"xmp",
"input":" &amp; <!-- &amp; --> &amp; </xmp>",
"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
"initialStates":["RCDATA state", "RAWTEXT state"],
"lastStartTag":"xmp",
"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
]}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +0,0 @@
{"tests": [
{"description":"<!---- >",
"input":"<!---- >",
"output":["ParseError", "ParseError", ["Comment","-- >"]]}
]}

View File

@@ -1,196 +0,0 @@
{"tests": [
{"description":"Correct Doctype lowercase",
"input":"<!DOCTYPE html>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype uppercase",
"input":"<!DOCTYPE HTML>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype mixed case",
"input":"<!DOCTYPE HtMl>",
"output":[["DOCTYPE", "html", null, null, true]]},
{"description":"Correct Doctype case with EOF",
"input":"<!DOCTYPE HtMl",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Truncated doctype start",
"input":"<!DOC>",
"output":["ParseError", ["Comment", "DOC"]]},
{"description":"Doctype in error",
"input":"<!DOCTYPE foo>",
"output":[["DOCTYPE", "foo", null, null, true]]},
{"description":"Single Start Tag",
"input":"<h>",
"output":[["StartTag", "h", {}]]},
{"description":"Empty end tag",
"input":"</>",
"output":["ParseError"]},
{"description":"Empty start tag",
"input":"<>",
"output":["ParseError", ["Character", "<>"]]},
{"description":"Start Tag w/attribute",
"input":"<h a='b'>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start Tag w/attribute no quotes",
"input":"<h a=b>",
"output":[["StartTag", "h", {"a":"b"}]]},
{"description":"Start/End Tag",
"input":"<h></h>",
"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
{"description":"Two unclosed start tags",
"input":"<p>One<p>Two",
"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
{"description":"End Tag w/attribute",
"input":"<h></h a='b'>",
"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
{"description":"Multiple atts",
"input":"<h a='b' c='d'>",
"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Multiple atts no space",
"input":"<h a='b'c='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
{"description":"Repeated attr",
"input":"<h a='b' a='d'>",
"output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
{"description":"Simple comment",
"input":"<!--comment-->",
"output":[["Comment", "comment"]]},
{"description":"Comment, Central dash no space",
"input":"<!----->",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Comment, two central dashes",
"input":"<!-- --comment -->",
"output":["ParseError", ["Comment", " --comment "]]},
{"description":"Unfinished comment",
"input":"<!--comment",
"output":["ParseError", ["Comment", "comment"]]},
{"description":"Start of a comment",
"input":"<!-",
"output":["ParseError", ["Comment", "-"]]},
{"description":"Short comment",
"input":"<!-->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment two",
"input":"<!--->",
"output":["ParseError", ["Comment", ""]]},
{"description":"Short comment three",
"input":"<!---->",
"output":[["Comment", ""]]},
{"description":"Ampersand EOF",
"input":"&",
"output":[["Character", "&"]]},
{"description":"Ampersand ampersand EOF",
"input":"&&",
"output":[["Character", "&&"]]},
{"description":"Ampersand space EOF",
"input":"& ",
"output":[["Character", "& "]]},
{"description":"Unfinished entity",
"input":"&f",
"output":["ParseError", ["Character", "&f"]]},
{"description":"Ampersand, number sign",
"input":"&#",
"output":["ParseError", ["Character", "&#"]]},
{"description":"Unfinished numeric entity",
"input":"&#x",
"output":["ParseError", ["Character", "&#x"]]},
{"description":"Entity with trailing semicolon (1)",
"input":"I'm &not;it",
"output":[["Character","I'm \u00ACit"]]},
{"description":"Entity with trailing semicolon (2)",
"input":"I'm &notin;",
"output":[["Character","I'm \u2209"]]},
{"description":"Entity without trailing semicolon (1)",
"input":"I'm &notit",
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
{"description":"Entity without trailing semicolon (2)",
"input":"I'm &notin",
"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
{"description":"Partial entity match at end of file",
"input":"I'm &no",
"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
{"description":"Non-ASCII character reference name",
"input":"&\u00AC;",
"output":["ParseError", ["Character", "&\u00AC;"]]},
{"description":"ASCII decimal entity",
"input":"&#0036;",
"output":[["Character","$"]]},
{"description":"ASCII hexadecimal entity",
"input":"&#x3f;",
"output":[["Character","?"]]},
{"description":"Hexadecimal entity in attribute",
"input":"<h a='&#x3f;'></h>",
"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
{"description":"Entity in attribute without semicolon ending in x",
"input":"<h a='&notx'>",
"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
{"description":"Entity in attribute without semicolon ending in 1",
"input":"<h a='&not1'>",
"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
{"description":"Entity in attribute without semicolon ending in i",
"input":"<h a='&noti'>",
"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
{"description":"Entity in attribute without semicolon",
"input":"<h a='&COPY'>",
"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
{"description":"Unquoted attribute ending in ampersand",
"input":"<s o=& t>",
"output":[["StartTag","s",{"o":"&","t":""}]]},
{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
"input":"<a a=a&>foo",
"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
{"description":"plaintext element",
"input":"<plaintext>foobar",
"output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
{"description":"Open angled bracket in unquoted attribute value state",
"input":"<a a=f<>",
"output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
]}

View File

@@ -1,179 +0,0 @@
{"tests": [
{"description":"DOCTYPE without name",
"input":"<!DOCTYPE>",
"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
{"description":"DOCTYPE without space before name",
"input":"<!DOCTYPEhtml>",
"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
{"description":"Incorrect DOCTYPE without a space before name",
"input":"<!DOCTYPEfoo>",
"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
{"description":"DOCTYPE with publicId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
{"description":"DOCTYPE with EOF after PUBLIC",
"input":"<!DOCTYPE html PUBLIC",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC '",
"input":"<!DOCTYPE html PUBLIC '",
"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
{"description":"DOCTYPE with EOF after PUBLIC 'x",
"input":"<!DOCTYPE html PUBLIC 'x",
"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
{"description":"DOCTYPE with systemId",
"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with publicId and systemId",
"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
{"description":"DOCTYPE with > in double-quoted publicId",
"input":"<!DOCTYPE html PUBLIC \">x",
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
{"description":"DOCTYPE with > in single-quoted publicId",
"input":"<!DOCTYPE html PUBLIC '>x",
"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
{"description":"DOCTYPE with > in double-quoted systemId",
"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"DOCTYPE with > in single-quoted systemId",
"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
{"description":"Incomplete doctype",
"input":"<!DOCTYPE html ",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Numeric entity representing the NUL character",
"input":"&#0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing the NUL character",
"input":"&#x0000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#2225222;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
"input":"&#x1010FFFF;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity pair representing a surrogate pair",
"input":"&#xD869;&#xDED6;",
"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
{"description":"Hexadecimal entity with mixed uppercase and lowercase",
"input":"&#xaBcD;",
"output":[["Character", "\uABCD"]]},
{"description":"Entity without a name",
"input":"&;",
"output":["ParseError", ["Character", "&;"]]},
{"description":"Unescaped ampersand in attribute value",
"input":"<h a='&'>",
"output":[["StartTag", "h", { "a":"&" }]]},
{"description":"StartTag containing <",
"input":"<a<b>",
"output":[["StartTag", "a<b", { }]]},
{"description":"Non-void element containing trailing /",
"input":"<h/>",
"output":[["StartTag","h",{},true]]},
{"description":"Void element with permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Void element with permitted slash (with attribute)",
"input":"<br foo='bar'/>",
"output":[["StartTag","br",{"foo":"bar"},true]]},
{"description":"StartTag containing /",
"input":"<h/a='b'>",
"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
{"description":"Double-quoted attribute value",
"input":"<h a=\"b\">",
"output":[["StartTag", "h", { "a":"b" }]]},
{"description":"Unescaped </",
"input":"</",
"output":["ParseError", ["Character", "</"]]},
{"description":"Illegal end tag name",
"input":"</1>",
"output":["ParseError", ["Comment", "1"]]},
{"description":"Simili processing instruction",
"input":"<?namespace>",
"output":["ParseError", ["Comment", "?namespace"]]},
{"description":"A bogus comment stops at >, even if preceeded by two dashes",
"input":"<?foo-->",
"output":["ParseError", ["Comment", "?foo--"]]},
{"description":"Unescaped <",
"input":"foo < bar",
"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
{"description":"Null Byte Replacement",
"input":"\u0000",
"output":["ParseError", ["Character", "\u0000"]]},
{"description":"Comment with dash",
"input":"<!---x",
"output":["ParseError", ["Comment", "-x"]]},
{"description":"Entity + newline",
"input":"\nx\n&gt;\n",
"output":[["Character","\nx\n>\n"]]},
{"description":"Start tag with no attributes but space before the greater-than sign",
"input":"<h >",
"output":[["StartTag", "h", {}]]},
{"description":"Empty attribute followed by uppercase attribute",
"input":"<h a B=''>",
"output":[["StartTag", "h", {"a":"", "b":""}]]},
{"description":"Double-quote after attribute name",
"input":"<h a \">",
"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
{"description":"Single-quote after attribute name",
"input":"<h a '>",
"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
{"description":"Empty end tag with following characters",
"input":"a</>bc",
"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
{"description":"Empty end tag with following tag",
"input":"a</><b>c",
"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
{"description":"Empty end tag with following comment",
"input":"a</><!--b-->c",
"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
{"description":"Empty end tag with following end tag",
"input":"a</></b>c",
"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
]}

File diff suppressed because it is too large Load Diff

View File

@@ -1,344 +0,0 @@
{"tests": [
{"description":"< in attribute name",
"input":"<z/0 <>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
{"description":"< in attribute value",
"input":"<z x=<>",
"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
{"description":"= in unquoted attribute value",
"input":"<z z=z=z>",
"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
{"description":"= attribute",
"input":"<z =>",
"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
{"description":"== attribute",
"input":"<z ==>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
{"description":"=== attribute",
"input":"<z ===>",
"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
{"description":"==== attribute",
"input":"<z ====>",
"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
{"description":"Allowed \" after ampersand in attribute value",
"input":"<z z=\"&\">",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"Non-allowed ' after ampersand in attribute value",
"input":"<z z=\"&'\">",
"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
{"description":"Allowed ' after ampersand in attribute value",
"input":"<z z='&'>",
"output":[["StartTag", "z", {"z": "&"}]]},
{"description":"Non-allowed \" after ampersand in attribute value",
"input":"<z z='&\"'>",
"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
{"description":"Text after bogus character reference",
"input":"<z z='&xlink_xmlns;'>bar<z>",
"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Text after hex character reference",
"input":"<z z='&#x0020; foo'>bar<z>",
"output":[["StartTag","z",{"z":" foo"}],["Character","bar"],["StartTag","z",{}]]},
{"description":"Attribute name starting with \"",
"input":"<foo \"='bar'>",
"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
{"description":"Attribute name starting with '",
"input":"<foo '='bar'>",
"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
{"description":"Attribute name containing \"",
"input":"<foo a\"b='bar'>",
"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
{"description":"Attribute name containing '",
"input":"<foo a'b='bar'>",
"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
{"description":"Unquoted attribute value containing '",
"input":"<foo a=b'c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
{"description":"Unquoted attribute value containing \"",
"input":"<foo a=b\"c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
{"description":"Double-quoted attribute value not followed by whitespace",
"input":"<foo a=\"b\"c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
{"description":"Single-quoted attribute value not followed by whitespace",
"input":"<foo a='b'c>",
"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
{"description":"Quoted attribute followed by permitted /",
"input":"<br a='b'/>",
"output":[["StartTag","br",{"a":"b"},true]]},
{"description":"Quoted attribute followed by non-permitted /",
"input":"<bar a='b'/>",
"output":[["StartTag","bar",{"a":"b"},true]]},
{"description":"CR EOF after doctype name",
"input":"<!doctype html \r",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"CR EOF in tag name",
"input":"<z\r",
"output":["ParseError"]},
{"description":"Slash EOF in tag name",
"input":"<z/",
"output":["ParseError"]},
{"description":"Zero hex numeric entity",
"input":"&#x0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero decimal numeric entity",
"input":"&#0",
"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
{"description":"Zero-prefixed hex numeric entity",
"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
"output":[["Character", "A"]]},
{"description":"Zero-prefixed decimal numeric entity",
"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
"output":[["Character", "A"]]},
{"description":"Empty hex numeric entities",
"input":"&#x &#X ",
"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
{"description":"Empty decimal numeric entities",
"input":"&# &#; ",
"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
{"description":"Non-BMP numeric entity",
"input":"&#x10000;",
"output":[["Character", "\uD800\uDC00"]]},
{"description":"Maximum non-BMP numeric entity",
"input":"&#X10FFFF;",
"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
{"description":"Above maximum numeric entity",
"input":"&#x110000;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"32-bit hex numeric entity",
"input":"&#x80000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit hex numeric entity",
"input":"&#x100000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"33-bit decimal numeric entity",
"input":"&#4294967361;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit hex numeric entity",
"input":"&#x10000000000000041;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"65-bit decimal numeric entity",
"input":"&#18446744073709551681;",
"output":["ParseError", ["Character", "\uFFFD"]]},
{"description":"Surrogate code point edge cases",
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
{"description":"Uppercase start tag name",
"input":"<X>",
"output":[["StartTag", "x", {}]]},
{"description":"Uppercase end tag name",
"input":"</X>",
"output":[["EndTag", "x"]]},
{"description":"Uppercase attribute name",
"input":"<x X>",
"output":[["StartTag", "x", { "x":"" }]]},
{"description":"Tag/attribute name case edge values",
"input":"<x@AZ[`az{ @AZ[`az{>",
"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
{"description":"Duplicate different-case attributes",
"input":"<x x=1 x=2 X=3>",
"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
{"description":"Uppercase close tag attributes",
"input":"</x X>",
"output":["ParseError", ["EndTag", "x"]]},
{"description":"Duplicate close tag attributes",
"input":"</x x x>",
"output":["ParseError", "ParseError", ["EndTag", "x"]]},
{"description":"Permitted slash",
"input":"<br/>",
"output":[["StartTag","br",{},true]]},
{"description":"Non-permitted slash",
"input":"<xr/>",
"output":[["StartTag","xr",{},true]]},
{"description":"Permitted slash but in close tag",
"input":"</br/>",
"output":["ParseError", ["EndTag", "br"]]},
{"description":"Doctype public case-sensitivity (1)",
"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
{"description":"Doctype public case-sensitivity (2)",
"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
{"description":"Doctype system case-sensitivity (1)",
"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
"output":[["DOCTYPE", "html", null, "XyZ", true]]},
{"description":"Doctype system case-sensitivity (2)",
"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
"output":[["DOCTYPE", "html", null, "xYz", true]]},
{"description":"U+0000 in lookahead region after non-matching character",
"input":"<!doc>\u0000",
"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true},
{"description":"U+0000 in lookahead region",
"input":"<!doc\u0000",
"output":["ParseError", ["Comment", "doc\uFFFD"]],
"ignoreErrorOrder":true},
{"description":"U+0080 in lookahead region",
"input":"<!doc\u0080",
"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
"ignoreErrorOrder":true},
{"description":"U+FDD1 in lookahead region",
"input":"<!doc\uFDD1",
"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
"ignoreErrorOrder":true},
{"description":"U+1FFFF in lookahead region",
"input":"<!doc\uD83F\uDFFF",
"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
"ignoreErrorOrder":true},
{"description":"CR followed by non-LF",
"input":"\r?",
"output":[["Character", "\n?"]]},
{"description":"CR at EOF",
"input":"\r",
"output":[["Character", "\n"]]},
{"description":"LF at EOF",
"input":"\n",
"output":[["Character", "\n"]]},
{"description":"CR LF",
"input":"\r\n",
"output":[["Character", "\n"]]},
{"description":"CR CR",
"input":"\r\r",
"output":[["Character", "\n\n"]]},
{"description":"LF LF",
"input":"\n\n",
"output":[["Character", "\n\n"]]},
{"description":"LF CR",
"input":"\n\r",
"output":[["Character", "\n\n"]]},
{"description":"text CR CR CR text",
"input":"text\r\r\rtext",
"output":[["Character", "text\n\n\ntext"]]},
{"description":"Doctype publik",
"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype publi",
"input":"<!DOCTYPE html PUBLI",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sistem",
"input":"<!DOCTYPE html SISTEM \"AbC\">",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype sys",
"input":"<!DOCTYPE html SYS",
"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
{"description":"Doctype html x>text",
"input":"<!DOCTYPE html x>text",
"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
{"description":"Grave accent in unquoted attribute",
"input":"<a a=aa`>",
"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
{"description":"EOF in tag name state ",
"input":"<a",
"output":["ParseError"]},
{"description":"EOF in tag name state",
"input":"<a",
"output":["ParseError"]},
{"description":"EOF in before attribute name state",
"input":"<a ",
"output":["ParseError"]},
{"description":"EOF in attribute name state",
"input":"<a a",
"output":["ParseError"]},
{"description":"EOF in after attribute name state",
"input":"<a a ",
"output":["ParseError"]},
{"description":"EOF in before attribute value state",
"input":"<a a =",
"output":["ParseError"]},
{"description":"EOF in attribute value (double quoted) state",
"input":"<a a =\"a",
"output":["ParseError"]},
{"description":"EOF in attribute value (single quoted) state",
"input":"<a a ='a",
"output":["ParseError"]},
{"description":"EOF in attribute value (unquoted) state",
"input":"<a a =a",
"output":["ParseError"]},
{"description":"EOF in after attribute value state",
"input":"<a a ='a'",
"output":["ParseError"]}
]}

File diff suppressed because it is too large Load Diff

View File

@@ -1,27 +0,0 @@
{"tests" : [
{"description": "Invalid Unicode character U+DFFF",
"doubleEscaped":true,
"input": "\\uDFFF",
"output":["ParseError", ["Character", "\\uFFFD"]]},
{"description": "Invalid Unicode character U+D800",
"doubleEscaped":true,
"input": "\\uD800",
"output":["ParseError", ["Character", "\\uFFFD"]]},
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
"doubleEscaped":true,
"input": "a\\uDFFF",
"output":["ParseError", ["Character", "a\\uFFFD"]]},
{"description": "Invalid Unicode character U+D800 with valid following character",
"doubleEscaped":true,
"input": "\\uD800a",
"output":["ParseError", ["Character", "\\uFFFDa"]]},
{"description":"CR followed by U+0000",
"input":"\r\u0000",
"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
"ignoreErrorOrder":true}
]
}

View File

@@ -1,22 +0,0 @@
{"xmlViolationTests": [
{"description":"Non-XML character",
"input":"a\uFFFFb",
"ignoreErrorOrder":true,
"output":["ParseError",["Character","a\uFFFDb"]]},
{"description":"Non-XML space",
"input":"a\u000Cb",
"ignoreErrorOrder":true,
"output":[["Character","a b"]]},
{"description":"Double hyphen in comment",
"input":"<!-- foo -- bar -->",
"output":["ParseError",["Comment"," foo - - bar "]]},
{"description":"FF between attributes",
"input":"<a b=''\u000Cc=''>",
"output":[["StartTag","a",{"b":"","c":""}]]}
]}

View File

@@ -1,194 +0,0 @@
#data
<a><p></a></p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>

View File

@@ -1,31 +0,0 @@
#data
<b>1<i>2<p>3</b>4
#errors
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

View File

@@ -1,135 +0,0 @@
#data
FOO<!-- BAR -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"

View File

@@ -1,370 +0,0 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

View File

@@ -1,603 +0,0 @@
#data
FOO&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
FOO& BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#x0078;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0083;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0092;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0093;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0097;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xD801;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xE000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"

View File

@@ -1,249 +0,0 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"

View File

@@ -1,246 +0,0 @@
#data
<div<div>
#errors
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
#document
| <html>
| <head>
| <body>
| ""
#data
&Kopf;
#errors
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>

View File

@@ -1,43 +0,0 @@
#data
<button>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

View File

@@ -1,40 +0,0 @@
#data
<isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
#document
| <html>
| <head>
| <body>
| <form>
| action="B"
| <hr>
| <label>
| "C"
| <input>
| foo="D"
| name="isindex"
| <hr>
#data
<form><isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>

View File

@@ -1,52 +0,0 @@
#data
<input type="hidden"><frameset>
#errors
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
31: “frameset” start tag seen.
31: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
47: End tag “table” did not match the name of the current open element (“svg”).
47: “table” closed but “caption” was still open.
47: End tag “table” seen, but there were open elements.
36: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
30: A table cell was implicitly closed, but there were open elements.
26: Unclosed element “desc”.
20: Unclosed element “svg”.
37: Stray end tag “desc”.
45: End of file seen and there were open elements.
45: Unclosed element “circle”.
7: Unclosed element “table”.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>

View File

@@ -1,308 +0,0 @@
#data
FOO<script>'Hello'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"

View File

@@ -1,212 +0,0 @@
#data
<table><th>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>
#data
<table><tr><td><svg><desc><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>

File diff suppressed because it is too large Load Diff

View File

@@ -1,799 +0,0 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
29: Bogus comment
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
35: Stray “svg” start tag.
42: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
43: Stray “svg” start tag.
50: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
34: Start tag “svg” seen in “table”.
41: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
53: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
58: Stray end tag “g”.
65: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
41: Start tag “svg” seen in “table”.
53: Stray end tag “g”.
65: Stray end tag “g”.
72: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
45: Start tag “svg” seen in “table”.
57: Stray end tag “g”.
69: Stray end tag “g”.
76: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
70: HTML start tag “p” in a foreign namespace context.
81: “table” closed but “caption” was still open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
78: “table” closed but “caption” was still open.
78: Unclosed elements on stack.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
44: Start tag “svg” seen in “table”.
56: Stray end tag “g”.
68: Stray end tag “g”.
71: HTML start tag “p” in a foreign namespace context.
71: Start tag “p” seen in “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
50: Stray “svg” start tag.
54: Stray “g” start tag.
62: Stray end tag “g”
66: Stray “g” start tag.
74: Stray end tag “g”
77: Stray “p” start tag.
88: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
36: Start tag “select” seen in “table”.
42: Stray “svg” start tag.
46: Stray “g” start tag.
54: Stray end tag “g”
58: Stray “g” start tag.
66: Stray end tag “g”
69: Stray “p” start tag.
80: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
41: Stray “svg” start tag.
68: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
34: Stray “svg” start tag.
61: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
31: Stray “svg” start tag.
35: Stray “g” start tag.
40: Stray end tag “g”
44: Stray “g” start tag.
49: Stray end tag “g”
52: Stray “p” start tag.
58: Stray “span” start tag.
58: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
42: Stray “svg” start tag.
46: Stray “g” start tag.
51: Stray end tag “g”
55: Stray “g” start tag.
60: Stray end tag “g”
63: Stray “p” start tag.
69: Stray “span” start tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
40: HTML start tag “ul” in a foreign namespace context.
41: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
35: HTML start tag “ul” in a foreign namespace context.
36: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

View File

@@ -1,482 +0,0 @@
#data
<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| attributeName=""
| attributeType=""
| baseFrequency=""
| baseProfile=""
| calcMode=""
| clipPathUnits=""
| contentScriptType=""
| contentStyleType=""
| diffuseConstant=""
| edgeMode=""
| externalResourcesRequired=""
| filterRes=""
| filterUnits=""
| glyphRef=""
| gradientTransform=""
| gradientUnits=""
| kernelMatrix=""
| kernelUnitLength=""
| keyPoints=""
| keySplines=""
| keyTimes=""
| lengthAdjust=""
| limitingConeAngle=""
| markerHeight=""
| markerUnits=""
| markerWidth=""
| maskContentUnits=""
| maskUnits=""
| numOctaves=""
| pathLength=""
| patternContentUnits=""
| patternTransform=""
| patternUnits=""
| pointsAtX=""
| pointsAtY=""
| pointsAtZ=""
| preserveAlpha=""
| preserveAspectRatio=""
| primitiveUnits=""
| refX=""
| refY=""
| repeatCount=""
| repeatDur=""
| requiredExtensions=""
| requiredFeatures=""
| specularConstant=""
| specularExponent=""
| spreadMethod=""
| startOffset=""
| stdDeviation=""
| stitchTiles=""
| surfaceScale=""
| systemLanguage=""
| tableValues=""
| targetX=""
| targetY=""
| textLength=""
| viewBox=""
| viewTarget=""
| xChannelSelector=""
| yChannelSelector=""
| zoomAndPan=""
#data
<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| attributename=""
| attributetype=""
| basefrequency=""
| baseprofile=""
| calcmode=""
| clippathunits=""
| contentscripttype=""
| contentstyletype=""
| diffuseconstant=""
| edgemode=""
| externalresourcesrequired=""
| filterres=""
| filterunits=""
| glyphref=""
| gradienttransform=""
| gradientunits=""
| kernelmatrix=""
| kernelunitlength=""
| keypoints=""
| keysplines=""
| keytimes=""
| lengthadjust=""
| limitingconeangle=""
| markerheight=""
| markerunits=""
| markerwidth=""
| maskcontentunits=""
| maskunits=""
| numoctaves=""
| pathlength=""
| patterncontentunits=""
| patterntransform=""
| patternunits=""
| pointsatx=""
| pointsaty=""
| pointsatz=""
| preservealpha=""
| preserveaspectratio=""
| primitiveunits=""
| refx=""
| refy=""
| repeatcount=""
| repeatdur=""
| requiredextensions=""
| requiredfeatures=""
| specularconstant=""
| specularexponent=""
| spreadmethod=""
| startoffset=""
| stddeviation=""
| stitchtiles=""
| surfacescale=""
| systemlanguage=""
| tablevalues=""
| targetx=""
| targety=""
| textlength=""
| viewbox=""
| viewtarget=""
| xchannelselector=""
| ychannelselector=""
| zoomandpan=""
#data
<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg altGlyph>
| <svg altGlyphDef>
| <svg altGlyphItem>
| <svg animateColor>
| <svg animateMotion>
| <svg animateTransform>
| <svg clipPath>
| <svg feBlend>
| <svg feColorMatrix>
| <svg feComponentTransfer>
| <svg feComposite>
| <svg feConvolveMatrix>
| <svg feDiffuseLighting>
| <svg feDisplacementMap>
| <svg feDistantLight>
| <svg feFlood>
| <svg feFuncA>
| <svg feFuncB>
| <svg feFuncG>
| <svg feFuncR>
| <svg feGaussianBlur>
| <svg feImage>
| <svg feMerge>
| <svg feMergeNode>
| <svg feMorphology>
| <svg feOffset>
| <svg fePointLight>
| <svg feSpecularLighting>
| <svg feSpotLight>
| <svg feTile>
| <svg feTurbulence>
| <svg foreignObject>
| <svg glyphRef>
| <svg linearGradient>
| <svg radialGradient>
| <svg textPath>
#data
<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math altglyph>
| <math altglyphdef>
| <math altglyphitem>
| <math animatecolor>
| <math animatemotion>
| <math animatetransform>
| <math clippath>
| <math feblend>
| <math fecolormatrix>
| <math fecomponenttransfer>
| <math fecomposite>
| <math feconvolvematrix>
| <math fediffuselighting>
| <math fedisplacementmap>
| <math fedistantlight>
| <math feflood>
| <math fefunca>
| <math fefuncb>
| <math fefuncg>
| <math fefuncr>
| <math fegaussianblur>
| <math feimage>
| <math femerge>
| <math femergenode>
| <math femorphology>
| <math feoffset>
| <math fepointlight>
| <math fespecularlighting>
| <math fespotlight>
| <math fetile>
| <math feturbulence>
| <math foreignobject>
| <math glyphref>
| <math lineargradient>
| <math radialgradient>
| <math textpath>
#data
<!DOCTYPE html><body><svg><solidColor /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg solidcolor>

View File

@@ -1,62 +0,0 @@
#data
<!DOCTYPE html><body><p>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"
#data
<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject><P>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "foo"
| <math math>
| <math mtext>
| <i>
| "baz"
| <math annotation-xml>
| <svg svg>
| <svg desc>
| <b>
| "eggs"
| <svg g>
| <svg foreignObject>
| <p>
| "spam"
| <table>
| <tbody>
| <tr>
| <td>
| <img>
| <svg g>
| "quux"
| "bar"

View File

@@ -1,74 +0,0 @@
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <xyz:abc>
| <span>
#data
<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| abc:def="gh"
| <head>
| <body>
| <xyz:abc>
#data
<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
#errors
15: Unexpected start tag html
#document
| <!DOCTYPE html>
| <html>
| xml:lang="bar"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| <head>
| <body>
#data
<!DOCTYPE html><html 123=456><html 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| 123="456"
| 789="012"
| <head>
| <body>
#data
<!DOCTYPE html><html><body 789=012>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| 789="012"

View File

@@ -1,208 +0,0 @@
#data
<!DOCTYPE html><p><b><i><u></p> <p>X
#errors
Line: 1 Col: 31 Unexpected end tag (p). Ignored.
Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| " "
| <p>
| "X"
#data
<p><b><i><u></p>
<p>X
#errors
Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag (p). Ignored.
Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| <i>
| <u>
| <b>
| <i>
| <u>
| "
"
| <p>
| "X"
#data
<!doctype html></html> <head>
#errors
Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " "
#data
<!doctype html></body><meta>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
#data
<html></html><!-- foo -->
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
#document
| <html>
| <head>
| <body>
| <!-- foo -->
#data
<!doctype html></body><title>X</title>
#errors
Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table> X<meta></table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " X"
| <meta>
| <table>
#data
<!doctype html><table> x</table>
#errors
Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
#data
<!doctype html><table> x </table>
#errors
Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x "
| <table>
#data
<!doctype html><table><tr> x</table>
#errors
Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| " x"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table>X<style> <tr>x </style> </table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
#errors
Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <a>
| "foo"
| <table>
| " "
| <tbody>
| <tr>
| <td>
| "bar"
| " "
#data
<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
#errors
6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
13: Stray start tag “frame”.
21: Stray end tag “frame”.
29: Stray end tag “frame”.
39: “frameset” start tag after “body” already open.
105: End of file seen inside an [R]CDATA element.
105: End of file seen and there were open elements.
XXX: These errors are wrong, please fix me!
#document
| <html>
| <head>
| <frameset>
| <frame>
| <frameset>
| <frame>
| <noframes>
| "</frameset><noframes>"
#data
<!DOCTYPE html><object></html>
#errors
1: Expected closing tag. Unexpected end of file
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <object>

File diff suppressed because it is too large Load Diff

View File

@@ -1,153 +0,0 @@
#data
<!doctype html><table><tbody><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tr><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <table>
| <tbody>
| <tr>
| <td>
#data
<!doctype html><table><tr><td><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| <td>
#data
<!doctype html><table><tr><th><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
| <select>
| <td>
#data
<!doctype html><table><caption><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <select>
| <tbody>
| <tr>
#data
<!doctype html><select><tr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><th>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tbody>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><thead>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><select><caption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!doctype html><table><tr></table>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| "a"

View File

@@ -1,269 +0,0 @@
#data
<!doctype html><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
#data
<!doctype html><table><tbody><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><tbody><tr><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><table><td><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><caption><plaintext></plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <plaintext>
| "</plaintext>"
#data
<!doctype html><table><tr><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <style>
| "</script>"
#data
<!doctype html><table><tr><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "abc"
| <table>
| <tbody>
| <tr>
| <script>
| "</style>"
#data
<!doctype html><table><caption><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><table><td><style></script></style>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <style>
| "</script>"
| "abc"
#data
<!doctype html><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
#data
<!doctype html><table><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
#data
<!doctype html><table><tr><select><script></style></script>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <script>
| "</style>"
| "abc"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><frameset></frameset><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><frameset></frameset></html><noframes>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
#data
<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
| <noframes>
| "abc"
| <!-- abc -->
#data
<!doctype html><table><tr></tbody><tfoot>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <tfoot>
#data
<!doctype html><table><td><svg></svg>abc<td>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| "abc"
| <td>

File diff suppressed because it is too large Load Diff

View File

@@ -1,763 +0,0 @@
#data
<!DOCTYPE html>Test
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Test"
#data
<textarea>test</div>test
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "test</div>test"
#data
<table><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td>test</tbody></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "test"
#data
<frame>test
#errors
Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
Line: 1 Col: 7 Unexpected start tag frame. Ignored.
#document
| <html>
| <head>
| <body>
| "test"
#data
<!DOCTYPE html><frameset>test
#errors
Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset><!DOCTYPE html>
#errors
Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><font><p><b>test</font>
#errors
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <p>
| <font>
| <b>
| "test"
#data
<!DOCTYPE html><dt><div><dd>
#errors
Line: 1 Col: 28 Missing end tag (div, dt).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <dt>
| <div>
| <dd>
#data
<script></x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
#document
| <html>
| <head>
| <script>
| "</x"
| <body>
#data
<table><plaintext><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "<td>"
| <table>
#data
<plaintext></plaintext>
#errors
Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <plaintext>
| "</plaintext>"
#data
<!DOCTYPE html><table><tr>TEST
#errors
Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 30 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "TEST"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
#errors
Line: 1 Col: 37 Unexpected start tag (body).
Line: 1 Col: 53 Unexpected start tag (body).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| t1="1"
| t2="2"
| t3="3"
| t4="4"
#data
</b test
#errors
Line: 1 Col: 8 Unexpected end of file in attribute name.
Line: 1 Col: 8 End tag contains unexpected attributes.
Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html></b test<b &=&amp>X
#errors
Line: 1 Col: 32 Named entity didn't end with ';'.
Line: 1 Col: 33 End tag contains unexpected attributes.
Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
#data
<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 54 Unexpected end of file in the tag name.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| type="text/x-foobar;baz"
| "X</SCRipt"
| <body>
#data
&
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&#
#errors
Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#"
#data
&#X
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#X"
#data
&#x
#errors
Line: 1 Col: 3 Numeric entity expected but none found.
Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&#x"
#data
&#45
#errors
Line: 1 Col: 4 Numeric entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "-"
#data
&x-test
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&x-test"
#data
<!doctypehtml><p><li>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <li>
#data
<!doctypehtml><p><dt>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dt>
#data
<!doctypehtml><p><dd>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <dd>
#data
<!doctypehtml><p><form>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <form>
#data
<!DOCTYPE html><p></P>X
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| "X"
#data
&AMP
#errors
Line: 1 Col: 4 Named entity didn't end with ';'.
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&"
#data
&AMp;
#errors
Line: 1 Col: 1 Named entity expected. Got none.
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "&AMp;"
#data
<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
#errors
Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
#data
<!DOCTYPE html>X</body>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html><!-- X
#errors
Line: 1 Col: 21 Unexpected end of file in comment.
#document
| <!DOCTYPE html>
| <!-- X -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><caption>test TEST</caption><td>test
#errors
Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| "test TEST"
| <tbody>
| <tr>
| <td>
| "test"
#data
<!DOCTYPE html><select><option><optgroup>
#errors
Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
| <optgroup>
#data
<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
#errors
Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <option>
| <option>
#data
<!DOCTYPE html><select><optgroup><option><optgroup>
#errors
Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
| <option>
| <optgroup>
#data
<!DOCTYPE html><datalist><option>foo</datalist>bar
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <datalist>
| <option>
| "foo"
| "bar"
#data
<!DOCTYPE html><font><input><input></font>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <font>
| <input>
| <input>
#data
<!DOCTYPE html><!-- XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX
#errors
Line: 1 Col: 29 Unexpected end of file in comment (-)
#document
| <!DOCTYPE html>
| <!-- XXX - XXX -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><!-- XXX - XXX - XXX -->
#errors
#document
| <!DOCTYPE html>
| <!-- XXX - XXX - XXX -->
| <html>
| <head>
| <body>
#data
<isindex test=x name=x>
#errors
Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| test="x"
| <hr>
#data
test
test
#errors
Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "test
test"
#data
<!DOCTYPE html><body><title>test</body></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "test</body>"
#data
<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
x { content:"</style" } </style>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <meta>
| name="z"
| <link>
| rel="foo"
| <style>
| "
x { content:"</style" } "
#data
<!DOCTYPE html><select><optgroup></optgroup></select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <optgroup>
#data
#errors
Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
#data
<!DOCTYPE html> <html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><script>
</script> <title>x</title> </head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <script>
| "
"
| " "
| <title>
| "x"
| " "
| <body>
#data
<!DOCTYPE html><html><body><html id=x>
#errors
Line: 1 Col: 38 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</body><html id="x">
#errors
Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
Line: 1 Col: 36 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
| "X"
#data
<!DOCTYPE html><head><html id=x>
#errors
Line: 1 Col: 32 html needs to be the first start tag.
#document
| <!DOCTYPE html>
| <html>
| id="x"
| <head>
| <body>
#data
<!DOCTYPE html>X</html>X
#errors
Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "XX"
#data
<!DOCTYPE html>X</html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X "
#data
<!DOCTYPE html>X</html><p>X
#errors
Line: 1 Col: 26 Unexpected start tag (p).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| "X"
#data
<!DOCTYPE html>X<p/x/y/z>
#errors
Line: 1 Col: 19 Expected a > after the /.
Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <p>
| x=""
| y=""
| z=""
#data
<!DOCTYPE html><!--x--
#errors
Line: 1 Col: 22 Unexpected end of file in comment (--).
#document
| <!DOCTYPE html>
| <!-- x -->
| <html>
| <head>
| <body>
#data
<!DOCTYPE html><table><tr><td></p></table>
#errors
Line: 1 Col: 34 Unexpected end tag (p). Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <p>
#data
<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
#errors
Line: 1 Col: 20 Expected space or '>'. Got ''
Line: 1 Col: 25 Erroneous DOCTYPE.
Line: 1 Col: 35 Unexpected character in comment found.
#document
| <!DOCTYPE <!doctype>
| <html>
| <head>
| <body>
| ">"
| <!-- <!--x -->
| "-->"
#data
<!doctype html><div><form></form><div></div></div>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <form>
| <div>

View File

@@ -1,455 +0,0 @@
#data
<!doctype html><p><button><button>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <button>
#data
<!doctype html><p><button><address>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <address>
#data
<!doctype html><p><button><blockquote>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <blockquote>
#data
<!doctype html><p><button><menu>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <menu>
#data
<!doctype html><p><button><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><p><button><ul>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <ul>
#data
<!doctype html><p><button><h1>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h1>
#data
<!doctype html><p><button><h6>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <h6>
#data
<!doctype html><p><button><listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <listing>
#data
<!doctype html><p><button><pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <pre>
#data
<!doctype html><p><button><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <form>
#data
<!doctype html><p><button><li>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <li>
#data
<!doctype html><p><button><dd>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dd>
#data
<!doctype html><p><button><dt>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <dt>
#data
<!doctype html><p><button><plaintext>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <plaintext>
#data
<!doctype html><p><button><table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <table>
#data
<!doctype html><p><button><hr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <hr>
#data
<!doctype html><p><button><xmp>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <xmp>
#data
<!doctype html><p><button></p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <button>
| <p>
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<!doctype html><address><button></address>a
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <address>
| <button>
| "a"
#data
<p><table></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <p>
| <table>
#data
<!doctype html><svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!doctype html><p><figcaption>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <figcaption>
#data
<!doctype html><p><summary>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <summary>
#data
<!doctype html><form><table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <table>
#data
<!doctype html><table><form><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><table><form></table><form>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <form>
#data
<!doctype html><svg><foreignObject><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <p>
#data
<!doctype html><svg><title>abc
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| "abc"
#data
<option><span><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <span>
| <option>
#data
<option><option>
#errors
#document
| <html>
| <head>
| <body>
| <option>
| <option>
#data
<math><annotation-xml><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <div>
#data
<math><annotation-xml encoding="application/svg+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/svg+xml"
| <div>
#data
<math><annotation-xml encoding="application/xhtml+xml"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="application/xhtml+xml"
| <div>
#data
<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="aPPlication/xhtmL+xMl"
| <div>
#data
<math><annotation-xml encoding="text/html"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="text/html"
| <div>
#data
<math><annotation-xml encoding="Text/htmL"><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding="Text/htmL"
| <div>
#data
<math><annotation-xml encoding=" text/html "><div>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| encoding=" text/html "
| <div>

View File

@@ -1,221 +0,0 @@
#data
<svg><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<math><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| "foo"
#data
<div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[foo
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "foo"
#data
<svg><![CDATA[
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]] >]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]] >"
#data
<svg><![CDATA[]]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]]"
#data
<svg><![CDATA[]
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]"
#data
<svg><![CDATA[]>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "]>a"
#data
<svg><foreignObject><div><![CDATA[foo]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <div>
| <!-- [CDATA[foo]] -->
#data
<svg><![CDATA[<svg>]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[</svg>a]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>a"
#data
<svg><![CDATA[</svg>a
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "</svg>a"
#data
<svg><![CDATA[<svg>]]><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <svg path>
#data
<svg><![CDATA[<svg>]]></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
#data
<svg><![CDATA[<svg>]]><!--path-->
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>"
| <!-- path -->
#data
<svg><![CDATA[<svg>]]>path
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<svg>path"
#data
<svg><![CDATA[<!--svg-->]]>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| "<!--svg-->"

View File

@@ -1,157 +0,0 @@
#data
<a><b><big><em><strong><div>X</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <big>
| <em>
| <strong>
| <big>
| <em>
| <strong>
| <div>
| <a>
| "X"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| "A"
#data
<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <div>
| id="1"
| <a>
| <div>
| id="2"
| <a>
| <div>
| id="3"
| <a>
| <div>
| id="4"
| <a>
| <div>
| id="5"
| <a>
| <div>
| id="6"
| <a>
| <div>
| id="7"
| <a>
| <div>
| id="8"
| <a>
| <div>
| id="9"
| <div>
| id="10"
| "A"
#data
<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
#errors
Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <cite>
| <b>
| <cite>
| <i>
| <cite>
| <i>
| <cite>
| <i>
| <i>
| <i>
| <div>
| <b>
| "X"
| "TEST"

View File

@@ -1,155 +0,0 @@
#data
<p><font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red><p>X
#errors
3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
116: Unclosed elements.
117: End of file seen and there were open elements.
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| <p>
| <font>
| color="red"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| color="red"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><font size=4><font size=4><font size=4><font size="5"><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| <p>
| <font>
| size="4"
| <font>
| size="4"
| <font>
| size="5"
| <font>
| size="4"
| "X"
#data
<p><font size=4 id=a><font size=4 id=b><font size=4><font size=4><p>X
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| <p>
| <font>
| id="a"
| size="4"
| <font>
| id="b"
| size="4"
| <font>
| size="4"
| <font>
| size="4"
| "X"
#data
<p><b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object><p>Y
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| <object>
| <b>
| id="a"
| <b>
| id="a"
| "X"
| <p>
| <b>
| id="a"
| <b>
| id="a"
| <b>
| id="a"
| <b>
| "Y"

View File

@@ -1,79 +0,0 @@
#data
<!DOCTYPE html>&NotEqualTilde;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸"
#data
<!DOCTYPE html>&NotEqualTilde;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "≂̸A"
#data
<!DOCTYPE html>&ThickSpace;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| ""
#data
<!DOCTYPE html>&ThickSpace;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html>&NotSubset;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂⃒"
#data
<!DOCTYPE html>&NotSubset;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "⊂⃒A"
#data
<!DOCTYPE html>&Gopf;
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾"
#data
<!DOCTYPE html>&Gopf;A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "𝔾A"

View File

@@ -1,219 +0,0 @@
#data
<!DOCTYPE html><body><foo>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <foo>
| "A"
#data
<!DOCTYPE html><body><area>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <area>
| "A"
#data
<!DOCTYPE html><body><base>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <base>
| "A"
#data
<!DOCTYPE html><body><basefont>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <basefont>
| "A"
#data
<!DOCTYPE html><body><bgsound>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <bgsound>
| "A"
#data
<!DOCTYPE html><body><br>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <br>
| "A"
#data
<!DOCTYPE html><body><col>A
#errors
26: Stray start tag “col”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><command>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <command>
| "A"
#data
<!DOCTYPE html><body><embed>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <embed>
| "A"
#data
<!DOCTYPE html><body><frame>A
#errors
26: Stray start tag “frame”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "A"
#data
<!DOCTYPE html><body><hr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <hr>
| "A"
#data
<!DOCTYPE html><body><img>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <img>
| "A"
#data
<!DOCTYPE html><body><input>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| "A"
#data
<!DOCTYPE html><body><keygen>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <keygen>
| "A"
#data
<!DOCTYPE html><body><link>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <link>
| "A"
#data
<!DOCTYPE html><body><meta>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| "A"
#data
<!DOCTYPE html><body><param>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <param>
| "A"
#data
<!DOCTYPE html><body><source>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <source>
| "A"
#data
<!DOCTYPE html><body><track>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <track>
| "A"
#data
<!DOCTYPE html><body><wbr>A
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <wbr>
| "A"

View File

@@ -1,313 +0,0 @@
#data
<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><br><a href='#2'><nobr>2<nobr></a><br><a href='#3'><nobr>3<nobr></a>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <a>
| href="#1"
| <nobr>
| "1"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#2"
| <a>
| href="#2"
| <nobr>
| "2"
| <nobr>
| <nobr>
| <br>
| <a>
| href="#3"
| <a>
| href="#3"
| <nobr>
| "3"
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
| <table>
#data
<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <table>
| <tbody>
| <tr>
| <td>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <div>
| <b>
| <nobr>
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <div>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
| <nobr>
| <nobr>
| "3"
#data
<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <nobr>
| <ins>
| <nobr>
| <i>
| <i>
| <nobr>
#data
<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| <nobr>
| "1"
| <ins>
| <nobr>
| <nobr>
| <i>
| "2"
#data
<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <b>
| "1"
| <nobr>
| <nobr>
| <i>
| <i>
| <nobr>
| "2"
#data
<p><code x</code></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <code>
| code=""
| x<=""
| <code>
| code=""
| x<=""
| "
"
#data
<!DOCTYPE html><svg><foreignObject><p><i></p>a
#errors
45: End tag “p” seen, but there were open elements.
41: Unclosed element “i”.
46: End of file seen and there were open elements.
35: Unclosed element “foreignObject”.
20: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg foreignObject>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><table><tr><td><svg><foreignObject><p><i></p>a
#errors
56: End tag “p” seen, but there were open elements.
52: Unclosed element “i”.
57: End of file seen and there were open elements.
46: Unclosed element “foreignObject”.
31: Unclosed element “svg”.
22: Unclosed element “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg foreignObject>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><math><mtext><p><i></p>a
#errors
38: End tag “p” seen, but there were open elements.
34: Unclosed element “i”.
39: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><table><tr><td><math><mtext><p><i></p>a
#errors
53: End tag “p” seen, but there were open elements.
49: Unclosed element “i”.
54: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mtext>
| <p>
| <i>
| <i>
| "a"
#data
<!DOCTYPE html><body><div><!/div>a
#errors
29: Bogus comment.
34: End of file seen and there were open elements.
26: Unclosed element “div”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <div>
| <!-- /div -->
| "a"

View File

@@ -1,305 +0,0 @@
#data
<head></head><style></style>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <body>
#data
<head></head><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
#document
| <html>
| <head>
| <script>
| <body>
#data
<head></head><!-- --><style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
#document
| <html>
| <head>
| <style>
| <script>
| <!-- -->
| <!-- -->
| <body>
#data
<head></head><!-- -->x<style></style><!-- --><script></script>
#errors
Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
#document
| <html>
| <head>
| <!-- -->
| <body>
| "x"
| <style>
| <!-- -->
| <script>
#data
<!DOCTYPE html><html><head></head><body><pre>
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><pre>
foo
</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "foo
"
#data
<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
</span></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <span>
| "
"
#data
<!DOCTYPE html><html><head></head><body><pre>x
y</pre></body></html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x
y"
#data
<!DOCTYPE html><html><head></head><body><pre>x<div>
y</pre></body></html>
#errors
Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "x"
| <div>
| "
y"
#data
<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <pre>
| "
A"
#data
<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <meta>
| <body>
#data
<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
#errors
Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<textarea>foo<span>bar</span><i>baz
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <textarea>
| "foo<span>bar</span><i>baz"
#data
<title>foo<span>bar</em><i>baz
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "foo<span>bar</em><i>baz"
| <body>
#data
<!DOCTYPE html><textarea>
</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "foo"
#data
<!DOCTYPE html><textarea>
foo</textarea>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <textarea>
| "
foo"
#data
<!DOCTYPE html><html><head></head><body><ul><li><div><p><li></ul></body></html>
#errors
Line: 1 Col: 60 Missing end tag (div, li).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| <p>
| <li>
#data
<!doctype html><nobr><nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><nobr><nobr></nobr><nobr>
#errors
Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <nobr>
| <nobr>
| <nobr>
#data
<!doctype html><html><body><p><table></table></body></html>
#errors
Not known
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <table>
#data
<p><table></table>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <p>
| <table>

View File

@@ -1,59 +0,0 @@
#data
direct div content
#errors
#document-fragment
div
#document
| "direct div content"
#data
direct textarea content
#errors
#document-fragment
textarea
#document
| "direct textarea content"
#data
textarea content with <em>pseudo</em> <foo>markup
#errors
#document-fragment
textarea
#document
| "textarea content with <em>pseudo</em> <foo>markup"
#data
this is &#x0043;DATA inside a <style> element
#errors
#document-fragment
style
#document
| "this is &#x0043;DATA inside a <style> element"
#data
</plaintext>
#errors
#document-fragment
plaintext
#document
| "</plaintext>"
#data
setting html's innerHTML
#errors
Line: 1 Col: 24 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "setting html's innerHTML"
#data
<title>setting head's innerHTML</title>
#errors
#document-fragment
head
#document
| <title>
| "setting head's innerHTML"

View File

@@ -1,191 +0,0 @@
#data
<style> <!-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
#document
| <html>
| <head>
| <style>
| " <!-- "
| <body>
| "x"
#data
<style> <!-- </style> --> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<style> <!--> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!--> "
| <body>
| "x"
#data
<style> <!---> </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!---> "
| <body>
| "x"
#data
<iframe> <!---> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!---> "
| "x"
#data
<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
#errors
Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <iframe>
| " <!--- "
| "->x --> x"
#data
<script> <!-- </script> --> </script>x
#errors
Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
#document
| <html>
| <head>
| <script>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<title> <!-- </title> --> </title>x
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| " <!-- "
| " "
| <body>
| "--> x"
#data
<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
#errors
Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <textarea>
| " <!--- "
| "->x --> x"
#data
<style> <!</-- </style>x
#errors
Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
#document
| <html>
| <head>
| <style>
| " <!</-- "
| <body>
| "x"
#data
<p><xmp></xmp>
#errors
XXX: Unknown
#document
| <html>
| <head>
| <body>
| <p>
| <xmp>
#data
<xmp> <!-- > --> </xmp>
#errors
Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <xmp>
| " <!-- > --> "
#data
<title>&amp;</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<title><!--&amp;--></title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
#document
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<title><!--</title>
#errors
Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
#document
| <html>
| <head>
| <title>
| "<!--"
| <body>
#data
<noscript><!--</noscript>--></noscript>
#errors
Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
#document
| <html>
| <head>
| <noscript>
| "<!--"
| <body>
| "-->"

View File

@@ -1,663 +0,0 @@
#data
<!doctype html></head> <head>
#errors
Line: 1 Col: 29 Unexpected start tag head. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| " "
| <body>
#data
<!doctype html><form><div></form><div>
#errors
33: End tag "form" seen but there were unclosed elements.
38: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <form>
| <div>
| <div>
#data
<!doctype html><title>&amp;</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "&"
| <body>
#data
<!doctype html><title><!--&amp;--></title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "<!--&-->"
| <body>
#data
<!doctype>
#errors
Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
Line: 1 Col: 10 Erroneous DOCTYPE.
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
#data
<!---x
#errors
Line: 1 Col: 6 Unexpected end of file in comment.
Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
#document
| <!-- -x -->
| <html>
| <head>
| <body>
#data
<body>
<div>
#errors
Line: 1 Col: 6 Unexpected start tag (body).
Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
#document-fragment
div
#document
| "
"
| <div>
#data
<frameset></frameset>
foo
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
<noframes>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
| "
"
| <noframes>
#data
<frameset></frameset>
<div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</html>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<frameset></frameset>
</div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
#document
| <html>
| <head>
| <frameset>
| "
"
#data
<form><form>
#errors
Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (form).
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <form>
#data
<button><button>
#errors
Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <button>
| <button>
#data
<table><tr><td></th>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (th). Ignored.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><caption><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (td). Ignored.
Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <tbody>
| <tr>
| <td>
#data
<table><caption><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
</caption><div>
#errors
Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption><div></caption>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
Line: 1 Col: 31 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><caption></table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
</table><div>
#errors
Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
#document-fragment
caption
#document
| <div>
#data
<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 Unexpected end tag (body). Ignored.
Line: 1 Col: 29 Unexpected end tag (col). Ignored.
Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 47 Unexpected end tag (html). Ignored.
Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 60 Unexpected end tag (td). Ignored.
Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 73 Unexpected end tag (th). Ignored.
Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
#data
<table><caption><div></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <caption>
| <div>
#data
<table><tr><td></body></caption></col></colgroup></html>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (body). Ignored.
Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
Line: 1 Col: 38 Unexpected end tag (col). Ignored.
Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 56 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
</table></tbody></tfoot></thead></tr><div>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
#document-fragment
td
#document
| <div>
#data
<table><colgroup>foo
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
foo<col>
#errors
Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
#document-fragment
colgroup
#document
| <col>
#data
<table><colgroup></col>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 23 This element (col) has no end tag.
Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
#data
<frameset><div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</frameset><frame>
#errors
Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
#document-fragment
frameset
#document
| <frame>
#data
<frameset></div>
#errors
Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <frameset>
#data
</body><div>
#errors
Line: 1 Col: 7 Unexpected end tag (body). Ignored.
Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
#document-fragment
body
#document
| <div>
#data
<table><tr><div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
#document-fragment
tr
#document
| <td>
#data
</tbody></tfoot></thead><td>
#errors
Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
#document-fragment
tr
#document
| <td>
#data
<table><tr><div><td>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <table>
| <tbody>
| <tr>
| <td>
#data
<caption><col><colgroup><tbody><tfoot><thead><tr>
#errors
Line: 1 Col: 9 Unexpected start tag (caption).
Line: 1 Col: 14 Unexpected start tag (col).
Line: 1 Col: 24 Unexpected start tag (colgroup).
Line: 1 Col: 31 Unexpected start tag (tbody).
Line: 1 Col: 38 Unexpected start tag (tfoot).
Line: 1 Col: 45 Unexpected start tag (thead).
Line: 1 Col: 49 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></thead>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
Line: 1 Col: 22 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
tbody
#document
| <tr>
#data
<table><tbody></body></caption></col></colgroup></html></td></th></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
Line: 1 Col: 70 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><tbody></div>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
Line: 1 Col: 20 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
#data
<table><table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 14 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
| <table>
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 14 Unexpected end tag (body). Ignored.
Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
Line: 1 Col: 30 Unexpected end tag (col). Ignored.
Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
Line: 1 Col: 48 Unexpected end tag (html). Ignored.
Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
Line: 1 Col: 61 Unexpected end tag (td). Ignored.
Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
Line: 1 Col: 74 Unexpected end tag (th). Ignored.
Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
Line: 1 Col: 87 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <table>
#data
</table><tr>
#errors
Line: 1 Col: 8 Unexpected end tag (table). Ignored.
Line: 1 Col: 12 Unexpected end of file. Expected table content.
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<body></body></html>
#errors
Line: 1 Col: 20 Unexpected html end tag in inner html mode.
Line: 1 Col: 20 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
#data
<html><frameset></frameset></html>
#errors
Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
#document
| <html>
| <head>
| <frameset>
| " "
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
#errors
Line: 1 Col: 50 Erroneous DOCTYPE.
Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
| <html>
| <head>
| <body>
#data
<param><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<source><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
<track><frameset></frameset>
#errors
Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
Line: 1 Col: 17 Unexpected start tag (frameset).
#document
| <html>
| <head>
| <frameset>
#data
</html><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>
#data
</body><frameset></frameset>
#errors
7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
17: Stray “frameset” start tag.
17: “frameset” start tag seen.
#document
| <html>
| <head>
| <frameset>

View File

@@ -1,390 +0,0 @@
#data
<!doctype html><body><title>X</title>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
#data
<!doctype html><table><title>X</title></table>
#errors
Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <title>
| "X"
| <table>
#data
<!doctype html><head></head><title>X</title>
#errors
Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html></head><title>X</title>
#errors
Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <title>
| "X"
| <body>
#data
<!doctype html><table><meta></table>
#errors
Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <meta>
| <table>
#data
<!doctype html><table>X<tr><td><table> <meta></table></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <tbody>
| <tr>
| <td>
| <meta>
| <table>
| " "
#data
<!doctype html><html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html> <head>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!doctype html><table><style> <tr>x </style> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <style>
| " <tr>x "
| " "
#data
<!doctype html><table><TBODY><script> <tr>x </script> </table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <script>
| " <tr>x "
| " "
#data
<!doctype html><p><applet><p>X</p></applet>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <applet>
| <p>
| "X"
#data
<!doctype html><listing>
X</listing>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <listing>
| "X"
#data
<!doctype html><select><input>X
#errors
Line: 1 Col: 30 Unexpected input start tag in the select phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <input>
| "X"
#data
<!doctype html><select><select>X
#errors
Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!doctype html><table><input type=hidDEN></table>
#errors
Line: 1 Col: 41 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table>X<input type=hidDEN></table>
#errors
Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "X"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type=hidDEN></table>
#errors
Line: 1 Col: 43 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table> <input type='hidDEN'></table>
#errors
Line: 1 Col: 45 Unexpected input with type hidden in table context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| " "
| <input>
| type="hidDEN"
#data
<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
#errors
Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type=" hidden"
| <table>
| <input>
| type="hidDEN"
#data
<!doctype html><table><select>X<tr>
#errors
Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
Line: 1 Col: 35 Unexpected end of file. Expected table content.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
| <table>
| <tbody>
| <tr>
#data
<!doctype html><select>X</select>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "X"
#data
<!DOCTYPE hTmL><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML><html></html>
#errors
Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
<body>X</body></body>
#errors
Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
Line: 1 Col: 21 Unexpected EOF in inner html mode.
#document-fragment
html
#document
| <head>
| <body>
| "X"
#data
<div><p>a</x> b
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 13 Unexpected end tag (x). Ignored.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| <p>
| "a b"
#data
<table><tr><td><code></code> </table>
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <code>
| " "
#data
<table><b><tr><td>aaa</td></tr>bbb</table>ccc
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| "bbb"
| <table>
| <tbody>
| <tr>
| <td>
| "aaa"
| <b>
| "ccc"
#data
A<table><tr> B</tr> B</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A B B"
| <table>
| <tbody>
| <tr>
#data
A<table><tr> B</tr> </em>C</table>
#errors
XXX: Fix me
#document
| <html>
| <head>
| <body>
| "A BC"
| <table>
| <tbody>
| <tr>
| " "
#data
<select><keygen>
#errors
Not known
#document
| <html>
| <head>
| <body>
| <select>
| <keygen>

View File

@@ -1,148 +0,0 @@
#data
<div>
<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 3 Col: 7 Unexpected end tag (span). Ignored.
Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "
"
| <div>
| "
x"
#data
<div>x<div></div>
</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 2 Col: 7 Unexpected end tag (span). Ignored.
Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "
x"
#data
<div>x<div></div>x</span>x
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
#data
<div>x<div></div>y</span>z
#errors
Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
Line: 1 Col: 25 Unexpected end tag (span). Ignored.
Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "yz"
#data
<table><div>x<div></div>x</span>x
#errors
Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
Line: 1 Col: 32 Unexpected end tag (span). Ignored.
Line: 1 Col: 33 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| <div>
| "x"
| <div>
| "xx"
| <table>
#data
x<table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 9 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "xx"
| <table>
#data
x<table><table>x
#errors
Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
Line: 1 Col: 16 Unexpected end of file. Expected table content.
#document
| <html>
| <head>
| <body>
| "x"
| <table>
| "x"
| <table>
#data
<b>a<div></div><div></b>y
#errors
Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <b>
| "a"
| <div>
| <div>
| <b>
| "y"
#data
<a><div><p></a>
#errors
Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <p>
| <a>

View File

@@ -1,457 +0,0 @@
#data
<!DOCTYPE html><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><body><math></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
#data
<!DOCTYPE html><math><mi>
#errors
25: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
#data
<!DOCTYPE html><math><annotation-xml><svg><u>
#errors
45: HTML start tag “u” in a foreign namespace context.
45: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <u>
#data
<!DOCTYPE html><body><select><math></math></select>
#errors
Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><math></math></option></select>
#errors
Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><math></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
#errors
Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
#errors
Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
#errors
Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table><p>quux
#errors
Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi><p>baz</table><p>quux
#errors
Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 41 Unexpected start tag (math).
Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi><p>baz
#errors
Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| "foo"
| <math mi>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><p><span>
#errors
Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <math math>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <math math>
| <math mi>
| xlink href="foo"
| xml lang="en"
| "bar"

View File

@@ -1,741 +0,0 @@
#data
<body><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
body
#document
| <span>
#data
<span><body>
#errors
#document-fragment
div
#document
| <span>
#data
<body><span>
#errors
#document-fragment
html
#document
| <head>
| <body>
| <span>
#data
<frameset><span>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
body
#document
| <span>
#data
<span><frameset>
#errors
#document-fragment
div
#document
| <span>
#data
<frameset><span>
#errors
#document-fragment
html
#document
| <head>
| <frameset>
#data
<table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
</table><tr>
#errors
#document-fragment
table
#document
| <tbody>
| <tr>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a>
#errors
#document-fragment
table
#document
| <a>
#data
<a><caption>a
#errors
#document-fragment
table
#document
| <a>
| <caption>
| "a"
#data
<a><colgroup><col>
#errors
#document-fragment
table
#document
| <a>
| <colgroup>
| <col>
#data
<a><tbody><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><tfoot><tr>
#errors
#document-fragment
table
#document
| <a>
| <tfoot>
| <tr>
#data
<a><thead><tr>
#errors
#document-fragment
table
#document
| <a>
| <thead>
| <tr>
#data
<a><tr>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
#data
<a><th>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <th>
#data
<a><td>
#errors
#document-fragment
table
#document
| <a>
| <tbody>
| <tr>
| <td>
#data
<table></table><tbody>
#errors
#document-fragment
caption
#document
| <table>
#data
</table><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></table>
#errors
#document-fragment
caption
#document
| <span>
#data
</caption><span>
#errors
#document-fragment
caption
#document
| <span>
#data
<span></caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><caption><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><col><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><colgroup><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><html><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tbody><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><td><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tfoot><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><thead><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><th><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span><tr><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
<span></table><span>
#errors
#document-fragment
caption
#document
| <span>
| <span>
#data
</colgroup><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<a><col>
#errors
#document-fragment
colgroup
#document
| <col>
#data
<caption><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<col><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
</table><a>
#errors
#document-fragment
tbody
#document
| <a>
#data
<a><tr>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<a><td>
#errors
#document-fragment
tbody
#document
| <a>
| <tr>
| <td>
#data
<td><table><tbody><a><tr>
#errors
#document-fragment
tbody
#document
| <tr>
| <td>
| <a>
| <table>
| <tbody>
| <tr>
#data
</tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table><a><tr></tr><tr>
#errors
#document-fragment
tr
#document
| <td>
| <a>
| <table>
| <tbody>
| <tr>
| <tr>
#data
<caption><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<col><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<colgroup><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tbody><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tfoot><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<thead><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<tr><td>
#errors
#document-fragment
tr
#document
| <td>
#data
</table><td>
#errors
#document-fragment
tr
#document
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<td><table></table><td>
#errors
#document-fragment
tr
#document
| <td>
| <table>
| <td>
#data
<caption><a>
#errors
#document-fragment
td
#document
| <a>
#data
<col><a>
#errors
#document-fragment
td
#document
| <a>
#data
<colgroup><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
<th><a>
#errors
#document-fragment
td
#document
| <a>
#data
<thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
<tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
</table><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tbody><a>
#errors
#document-fragment
td
#document
| <a>
#data
</td><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tfoot><a>
#errors
#document-fragment
td
#document
| <a>
#data
</thead><a>
#errors
#document-fragment
td
#document
| <a>
#data
</th><a>
#errors
#document-fragment
td
#document
| <a>
#data
</tr><a>
#errors
#document-fragment
td
#document
| <a>
#data
<table><td><td>
#errors
#document-fragment
td
#document
| <table>
| <tbody>
| <tr>
| <td>
| <td>
#data
</select><option>
#errors
#document-fragment
select
#document
| <option>
#data
<input><option>
#errors
#document-fragment
select
#document
| <option>
#data
<keygen><option>
#errors
#document-fragment
select
#document
| <option>
#data
<textarea><option>
#errors
#document-fragment
select
#document
| <option>
#data
</html><!--abc-->
#errors
#document-fragment
html
#document
| <head>
| <body>
| <!-- abc -->
#data
</frameset><frame>
#errors
#document-fragment
frameset
#document
| <frame>
#data
#errors
#document-fragment
html
#document
| <head>
| <body>

View File

@@ -1,261 +0,0 @@
#data
<b><p>Bold </b> Not bold</p>
Also not bold.
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <p>
| <b>
| "Bold "
| " Not bold"
| "
Also not bold."
#data
<html>
<font color=red><i>Italic and Red<p>Italic and Red </font> Just italic.</p> Italic only.</i> Plain
<p>I should not be red. <font color=red>Red. <i>Italic and red.</p>
<p>Italic and red. </i> Red.</font> I should not be red.</p>
<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
#errors
#document
| <html>
| <head>
| <body>
| <font>
| color="red"
| <i>
| "Italic and Red"
| <i>
| <p>
| <font>
| color="red"
| "Italic and Red "
| " Just italic."
| " Italic only."
| " Plain
"
| <p>
| "I should not be red. "
| <font>
| color="red"
| "Red. "
| <i>
| "Italic and red."
| <font>
| color="red"
| <i>
| "
"
| <p>
| <font>
| color="red"
| <i>
| "Italic and red. "
| " Red."
| " I should not be red."
| "
"
| <b>
| "Bold "
| <i>
| "Bold and italic"
| <i>
| " Only Italic "
| " Plain"
#data
<html><body>
<p><font size="7">First paragraph.</p>
<p>Second paragraph.</p></font>
<b><p><i>Bold and Italic</b> Italic</p>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <p>
| <font>
| size="7"
| "First paragraph."
| <font>
| size="7"
| "
"
| <p>
| "Second paragraph."
| "
"
| <b>
| <p>
| <b>
| <i>
| "Bold and Italic"
| <i>
| " Italic"
#data
<html>
<dl>
<dt><b>Boo
<dd>Goo?
</dl>
</html>
#errors
#document
| <html>
| <head>
| <body>
| <dl>
| "
"
| <dt>
| <b>
| "Boo
"
| <dd>
| <b>
| "Goo?
"
| <b>
| "
"
#data
<html><body>
<label><a><div>Hello<div>World</div></a></label>
</body></html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <label>
| <a>
| <div>
| <a>
| "Hello"
| <div>
| "World"
| "
"
#data
<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
#errors
#document
| <html>
| <head>
| <body>
| <center>
| " "
| <font>
| "a"
| <font>
| <img>
| " "
| <table>
| " "
| <tbody>
| <tr>
| <td>
| " "
| " "
| " "
#data
<table><tr><p><a><p>You should see this text.
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <a>
| <p>
| <a>
| "You should see this text."
| <table>
| <tbody>
| <tr>
#data
<TABLE>
<TR>
<CENTER><CENTER><TD></TD></TR><TR>
<FONT>
<TABLE><tr></tr></TABLE>
</P>
<a></font><font></a>
This page contains an insanely badly-nested tag sequence.
#errors
#document
| <html>
| <head>
| <body>
| <center>
| <center>
| <font>
| "
"
| <table>
| "
"
| <tbody>
| <tr>
| "
"
| <td>
| <tr>
| "
"
| <table>
| <tbody>
| <tr>
| <font>
| "
"
| <p>
| "
"
| <a>
| <a>
| <font>
| <font>
| "
This page contains an insanely badly-nested tag sequence."
#data
<html>
<body>
<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
</body>
</html>
#errors
#document
| <html>
| <head>
| <body>
| "
"
| <b>
| <nobr>
| <div>
| <b>
| <nobr>
| "This text is in a div inside a nobr"
| "More text that should not be in the nobr, i.e., the
nobr should have closed the div inside it implicitly. "
| <pre>
| "A pre tag outside everything else."
| "
"

View File

@@ -1,594 +0,0 @@
#data
Test
#errors
Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
#document
| <html>
| <head>
| <body>
| "Test"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<div>Test</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Test"
#data
<di
#errors
#document
| <html>
| <head>
| <body>
#data
<div>Hello</div>
<script>
console.log("PASS");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("PASS");
"
| "
"
| <div>
| "Bye"
#data
<div foo="bar">Hello</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="bar"
| "Hello"
#data
<div>Hello</div>
<script>
console.log("FOO<span>BAR</span>BAZ");
</script>
<div>Bye</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "Hello"
| "
"
| <script>
| "
console.log("FOO<span>BAR</span>BAZ");
"
| "
"
| <div>
| "Bye"
#data
<foo bar="baz"></foo><potato quack="duck"></potato>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo bar="baz"><potato quack="duck"></potato></foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="baz"
| <potato>
| quack="duck"
#data
<foo></foo bar="baz"><potato></potato quack="duck">
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| <potato>
#data
</ tttt>
#errors
#document
| <!-- tttt -->
| <html>
| <head>
| <body>
#data
<div FOO ><img><img></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo=""
| <img>
| <img>
#data
<p>Test</p<p>Test2</p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "TestTest2"
#data
<rdar://problem/6869687>
#errors
#document
| <html>
| <head>
| <body>
| <rdar:>
| 6869687=""
| problem=""
#data
<A>test< /A>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "test< /A>"
#data
&lt;
#errors
#document
| <html>
| <head>
| <body>
| "<"
#data
<body foo='bar'><body foo='baz' yo='mama'>
#errors
#document
| <html>
| <head>
| <body>
| foo="bar"
| yo="mama"
#data
<body></br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy><br foo="bar"></body>
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<body></body></br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <br>
#data
<bdy></body><br foo="bar">
#errors
#document
| <html>
| <head>
| <body>
| <bdy>
| <br>
| foo="bar"
#data
<html><body></body></html><!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
#data
<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
#errors
#document
| <html>
| <head>
| <body>
| "x"
| <!-- Hi there -->
| <!-- Again -->
#data
<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rp>
| "xx"
#data
<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
#errors
#document
| <html>
| <head>
| <body>
| <ruby>
| <div>
| <rt>
| "xx"
#data
<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
#errors
#document
| <html>
| <head>
| <frameset>
| <!-- 1 -->
| <noframes>
| "A"
| <!-- 2 -->
| <!-- 3 -->
| <noframes>
| "B"
| <!-- 4 -->
| <noframes>
| "C"
| <!-- 5 -->
| <!-- 6 -->
#data
<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <option>
| "B"
| <select>
| <option>
| "C"
| <option>
| "D"
| <select>
| <option>
| "E"
| <option>
| "F"
| <select>
| <option>
| "G"
#data
<dd><dd><dt><dt><dd><li><li>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| <dd>
| <dt>
| <dt>
| <dd>
| <li>
| <li>
#data
<div><b></div><div><nobr>a<nobr>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <b>
| <div>
| <b>
| <nobr>
| "a"
| <nobr>
#data
<head></head>
<body></body>
#errors
#document
| <html>
| <head>
| "
"
| <body>
#data
<head></head> <style></style>ddd
#errors
#document
| <html>
| <head>
| <style>
| " "
| <body>
| "ddd"
#data
<kbd><table></kbd><col><select><tr>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
#data
<kbd><table></kbd><col><select><tr></table><div>
#errors
#document
| <html>
| <head>
| <body>
| <kbd>
| <select>
| <table>
| <colgroup>
| <col>
| <tbody>
| <tr>
| <div>
#data
<a><li><style></style><title></title></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <li>
| <a>
| <style>
| <title>
#data
<font></p><p><meta><title></title></font>
#errors
#document
| <html>
| <head>
| <body>
| <font>
| <p>
| <p>
| <font>
| <meta>
| <title>
#data
<a><center><title></title><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <center>
| <a>
| <title>
| <a>
#data
<svg><title><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <div>
#data
<svg><title><rect><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <rect>
| <div>
#data
<svg><title><svg><div>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg title>
| <svg svg>
| <div>
#data
<img <="" FAIL>
#errors
#document
| <html>
| <head>
| <body>
| <img>
| <=""
| fail=""
#data
<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| <div>
| id="foo"
| "A"
| <li>
| "B"
| <div>
| "C"
#data
<svg><em><desc></em>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <em>
| <desc>
#data
<svg><tfoot></mi><td>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg tfoot>
| <svg td>
#data
<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mrow>
| <math mrow>
| <math mn>
| "1"
| <math mi>
| "a"
#data
<!doctype html><input type="hidden"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!doctype html><input type="button"><frameset>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <input>
| type="button"

View File

@@ -1,94 +0,0 @@
#data
<foo bar=qux/>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| bar="qux/"
#data
<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| id="status"
| <noscript>
| "<strong>A</strong>"
| <span>
| "B"
#data
<div><sarcasm><div></div></sarcasm></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <sarcasm>
| <div>
#data
<html><body><img src="" border="0" alt="><div>A</div></body></html>
#errors
#document
| <html>
| <head>
| <body>
#data
<table><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><td></thead>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><td></tfoot>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
#data
<table><thead><td></tbody>A
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <thead>
| <tr>
| <td>
| "A"

View File

@@ -1,64 +0,0 @@
import sys
import os
import json
import re
import html5lib
import support
import test_parser
import test_tokenizer
p = html5lib.HTMLParser()
unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
def main(out_path):
if not os.path.exists(out_path):
sys.stderr.write("Path %s does not exist"%out_path)
sys.exit(1)
for filename in support.html5lib_test_files('tokenizer', '*.test'):
run_file(filename, out_path)
def run_file(filename, out_path):
try:
tests_data = json.load(file(filename))
except ValueError:
sys.stderr.write("Failed to load %s\n"%filename)
return
name = os.path.splitext(os.path.split(filename)[1])[0]
output_file = open(os.path.join(out_path, "tokenizer_%s.dat"%name), "w")
if 'tests' in tests_data:
for test_data in tests_data['tests']:
if 'initialStates' not in test_data:
test_data["initialStates"] = ["Data state"]
for initial_state in test_data["initialStates"]:
if initial_state != "Data state":
#don't support this yet
continue
test = make_test(test_data)
output_file.write(test)
output_file.close()
def make_test(test_data):
if 'doubleEscaped' in test_data:
test_data = test_tokenizer.unescape_test(test_data)
rv = []
rv.append("#data")
rv.append(test_data["input"].encode("utf8"))
rv.append("#errors")
tree = p.parse(test_data["input"])
output = p.tree.testSerializer(tree)
output = "\n".join(("| "+ line[3:]) if line.startswith("| ") else line
for line in output.split("\n"))
output = unnamespaceExpected(r"\1<\2>", output)
rv.append(output.encode("utf8"))
rv.append("")
return "\n".join(rv)
if __name__ == "__main__":
main(sys.argv[1])

View File

@@ -1,96 +0,0 @@
"""A collection of modules for building different kinds of tree from
HTML documents.
To create a treebuilder for a new type of tree, you need to do
implement several things:
1) A set of classes for various types of elements: Document, Doctype,
Comment, Element. These must implement the interface of
_base.treebuilders.Node (although comment nodes have a different
signature for their constructor, see treebuilders.simpletree.Comment)
Textual content may also be implemented as another node type, or not, as
your tree implementation requires.
2) A treebuilder object (called TreeBuilder by convention) that
inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
documentClass - the class to use for the bottommost node of a document
elementClass - the class to use for HTML Elements
commentClass - the class to use for comments
doctypeClass - the class to use for doctypes
It also has one required method:
getDocument - Returns the root node of the complete document tree
3) If you wish to run the unit tests, you must also create a
testSerializer method on your treebuilder which accepts a node and
returns a string containing Node and its children serialized according
to the format used in the unittests
The supplied simpletree module provides a python-only implementation
of a full treebuilder and is a useful reference for the semantics of
the various methods.
"""
treeBuilderCache = {}
import sys
def getTreeBuilder(treeType, implementation=None, **kwargs):
"""Get a TreeBuilder class for various types of tree with built-in support
treeType - the name of the tree type required (case-insensitive). Supported
values are "simpletree", "dom", "etree" and "beautifulsoup"
"simpletree" - a built-in DOM-ish tree type with support for some
more pythonic idioms.
"dom" - A generic builder for DOM implementations, defaulting to
a xml.dom.minidom based implementation for the sake of
backwards compatibility (as releases up until 0.10 had a
builder called "dom" that was a minidom implemenation).
"etree" - A generic builder for tree implementations exposing an
elementtree-like interface (known to work with
ElementTree, cElementTree and lxml.etree).
"beautifulsoup" - Beautiful soup (if installed)
implementation - (Currently applies to the "etree" and "dom" tree types). A
module implementing the tree type e.g.
xml.etree.ElementTree or lxml.etree."""
treeType = treeType.lower()
if treeType not in treeBuilderCache:
if treeType == "dom":
import dom
# XXX: Keep backwards compatibility by using minidom if no implementation is given
if implementation == None:
from xml.dom import minidom
implementation = minidom
# XXX: NEVER cache here, caching is done in the dom submodule
return dom.getDomModule(implementation, **kwargs).TreeBuilder
elif treeType == "simpletree":
import simpletree
treeBuilderCache[treeType] = simpletree.TreeBuilder
elif treeType == "beautifulsoup":
import soup
treeBuilderCache[treeType] = soup.TreeBuilder
elif treeType == "lxml":
import etree_lxml
treeBuilderCache[treeType] = etree_lxml.TreeBuilder
elif treeType == "etree":
# Come up with a sane default
if implementation == None:
try:
import xml.etree.cElementTree as ET
except ImportError:
try:
import xml.etree.ElementTree as ET
except ImportError:
try:
import cElementTree as ET
except ImportError:
import elementtree.ElementTree as ET
implementation = ET
import etree
# NEVER cache here, caching is done in the etree submodule
return etree.getETreeModule(implementation, **kwargs).TreeBuilder
else:
raise ValueError("""Unrecognised treebuilder "%s" """%treeType)
return treeBuilderCache.get(treeType)

View File

@@ -1,256 +0,0 @@
import _base
from html5lib.constants import voidElements, namespaces, prefixes
from xml.sax.saxutils import escape
# Really crappy basic implementation of a DOM-core like thing
class Node(_base.Node):
type = -1
def __init__(self, name):
self.name = name
self.parent = None
self.value = None
self.childNodes = []
self._flags = []
def __iter__(self):
for node in self.childNodes:
yield node
for item in node:
yield item
def __unicode__(self):
return self.name
def toxml(self):
raise NotImplementedError
def printTree(self, indent=0):
tree = '\n|%s%s' % (' '* indent, unicode(self))
for child in self.childNodes:
tree += child.printTree(indent + 2)
return tree
def appendChild(self, node):
assert isinstance(node, Node)
if (isinstance(node, TextNode) and self.childNodes and
isinstance(self.childNodes[-1], TextNode)):
self.childNodes[-1].value += node.value
else:
self.childNodes.append(node)
node.parent = self
def insertText(self, data, insertBefore=None):
assert isinstance(data, unicode), "data %s is of type %s expected unicode"%(repr(data), type(data))
if insertBefore is None:
self.appendChild(TextNode(data))
else:
self.insertBefore(TextNode(data), insertBefore)
def insertBefore(self, node, refNode):
index = self.childNodes.index(refNode)
if (isinstance(node, TextNode) and index > 0 and
isinstance(self.childNodes[index - 1], TextNode)):
self.childNodes[index - 1].value += node.value
else:
self.childNodes.insert(index, node)
node.parent = self
def removeChild(self, node):
try:
self.childNodes.remove(node)
except:
# XXX
raise
node.parent = None
def cloneNode(self):
raise NotImplementedError
def hasContent(self):
"""Return true if the node has children or text"""
return bool(self.childNodes)
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class Document(Node):
type = 1
def __init__(self):
Node.__init__(self, None)
def __str__(self):
return "#document"
def __unicode__(self):
return str(self)
def appendChild(self, child):
Node.appendChild(self, child)
def toxml(self, encoding="utf=8"):
result = ""
for child in self.childNodes:
result += child.toxml()
return result.encode(encoding)
def hilite(self, encoding="utf-8"):
result = "<pre>"
for child in self.childNodes:
result += child.hilite()
return result.encode(encoding) + "</pre>"
def printTree(self):
tree = unicode(self)
for child in self.childNodes:
tree += child.printTree(2)
return tree
def cloneNode(self):
return Document()
class DocumentFragment(Document):
type = 2
def __str__(self):
return "#document-fragment"
def __unicode__(self):
return str(self)
def cloneNode(self):
return DocumentFragment()
class DocumentType(Node):
type = 3
def __init__(self, name, publicId, systemId):
Node.__init__(self, name)
self.publicId = publicId
self.systemId = systemId
def __unicode__(self):
if self.publicId or self.systemId:
publicId = self.publicId or ""
systemId = self.systemId or ""
return """<!DOCTYPE %s "%s" "%s">"""%(
self.name, publicId, systemId)
else:
return u"<!DOCTYPE %s>" % self.name
toxml = __unicode__
def hilite(self):
return '<code class="markup doctype">&lt;!DOCTYPE %s></code>' % self.name
def cloneNode(self):
return DocumentType(self.name, self.publicId, self.systemId)
class TextNode(Node):
type = 4
def __init__(self, value):
Node.__init__(self, None)
self.value = value
def __unicode__(self):
return u"\"%s\"" % self.value
def toxml(self):
return escape(self.value)
hilite = toxml
def cloneNode(self):
return TextNode(self.value)
class Element(Node):
type = 5
def __init__(self, name, namespace=None):
Node.__init__(self, name)
self.namespace = namespace
self.attributes = {}
def __unicode__(self):
if self.namespace == None:
return u"<%s>" % self.name
else:
return u"<%s %s>"%(prefixes[self.namespace], self.name)
def toxml(self):
result = '<' + self.name
if self.attributes:
for name,value in self.attributes.iteritems():
result += u' %s="%s"' % (name, escape(value,{'"':'&quot;'}))
if self.childNodes:
result += '>'
for child in self.childNodes:
result += child.toxml()
result += u'</%s>' % self.name
else:
result += u'/>'
return result
def hilite(self):
result = '&lt;<code class="markup element-name">%s</code>' % self.name
if self.attributes:
for name, value in self.attributes.iteritems():
result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'&quot;'}))
if self.childNodes:
result += ">"
for child in self.childNodes:
result += child.hilite()
elif self.name in voidElements:
return result + ">"
return result + '&lt;/<code class="markup element-name">%s</code>>' % self.name
def printTree(self, indent):
tree = '\n|%s%s' % (' '*indent, unicode(self))
indent += 2
if self.attributes:
for name, value in sorted(self.attributes.iteritems()):
if isinstance(name, tuple):
name = "%s %s"%(name[0], name[1])
tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
for child in self.childNodes:
tree += child.printTree(indent)
return tree
def cloneNode(self):
newNode = Element(self.name)
if hasattr(self, 'namespace'):
newNode.namespace = self.namespace
for attr, value in self.attributes.iteritems():
newNode.attributes[attr] = value
return newNode
class CommentNode(Node):
type = 6
def __init__(self, data):
Node.__init__(self, None)
self.data = data
def __unicode__(self):
return "<!-- %s -->" % self.data
def toxml(self):
return "<!--%s-->" % self.data
def hilite(self):
return '<code class="markup comment">&lt;!--%s--></code>' % escape(self.data)
def cloneNode(self):
return CommentNode(self.data)
class TreeBuilder(_base.TreeBuilder):
documentClass = Document
doctypeClass = DocumentType
elementClass = Element
commentClass = CommentNode
fragmentClass = DocumentFragment
def testSerializer(self, node):
return node.printTree()

View File

@@ -1,236 +0,0 @@
import warnings
warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning)
from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
import _base
from html5lib.constants import namespaces, DataLossWarning
class AttrList(object):
def __init__(self, element):
self.element = element
self.attrs = dict(self.element.attrs)
def __iter__(self):
return self.attrs.items().__iter__()
def __setitem__(self, name, value):
"set attr", name, value
self.element[name] = value
def items(self):
return self.attrs.items()
def keys(self):
return self.attrs.keys()
def __getitem__(self, name):
return self.attrs[name]
def __contains__(self, name):
return name in self.attrs.keys()
def __eq__(self, other):
if len(self.keys()) != len(other.keys()):
return False
for item in self.keys():
if item not in other:
return False
if self[item] != other[item]:
return False
return True
class Element(_base.Node):
def __init__(self, element, soup, namespace):
_base.Node.__init__(self, element.name)
self.element = element
self.soup = soup
self.namespace = namespace
def _nodeIndex(self, node, refNode):
# Finds a node by identity rather than equality
for index in range(len(self.element.contents)):
if id(self.element.contents[index]) == id(refNode.element):
return index
return None
def appendChild(self, node):
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[-1].__class__ == NavigableString):
# Concatenate new text onto old text node
# (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
newStr = NavigableString(self.element.contents[-1]+node.element)
# Remove the old text node
# (Can't simply use .extract() by itself, because it fails if
# an equal text node exists within the parent node)
oldElement = self.element.contents[-1]
del self.element.contents[-1]
oldElement.parent = None
oldElement.extract()
self.element.insert(len(self.element.contents), newStr)
else:
self.element.insert(len(self.element.contents), node.element)
node.parent = self
def getAttributes(self):
return AttrList(self.element)
def setAttributes(self, attributes):
if attributes:
for name, value in attributes.items():
self.element[name] = value
attributes = property(getAttributes, setAttributes)
def insertText(self, data, insertBefore=None):
text = TextNode(NavigableString(data), self.soup)
if insertBefore:
self.insertBefore(text, insertBefore)
else:
self.appendChild(text)
def insertBefore(self, node, refNode):
index = self._nodeIndex(node, refNode)
if (node.element.__class__ == NavigableString and self.element.contents
and self.element.contents[index-1].__class__ == NavigableString):
# (See comments in appendChild)
newStr = NavigableString(self.element.contents[index-1]+node.element)
oldNode = self.element.contents[index-1]
del self.element.contents[index-1]
oldNode.parent = None
oldNode.extract()
self.element.insert(index-1, newStr)
else:
self.element.insert(index, node.element)
node.parent = self
def removeChild(self, node):
index = self._nodeIndex(node.parent, node)
del node.parent.element.contents[index]
node.element.parent = None
node.element.extract()
node.parent = None
def reparentChildren(self, newParent):
while self.element.contents:
child = self.element.contents[0]
child.extract()
if isinstance(child, Tag):
newParent.appendChild(Element(child, self.soup, namespaces["html"]))
else:
newParent.appendChild(TextNode(child, self.soup))
def cloneNode(self):
node = Element(Tag(self.soup, self.element.name), self.soup, self.namespace)
for key,value in self.attributes:
node.attributes[key] = value
return node
def hasContent(self):
return self.element.contents
def getNameTuple(self):
if self.namespace == None:
return namespaces["html"], self.name
else:
return self.namespace, self.name
nameTuple = property(getNameTuple)
class TextNode(Element):
def __init__(self, element, soup):
_base.Node.__init__(self, None)
self.element = element
self.soup = soup
def cloneNode(self):
raise NotImplementedError
class TreeBuilder(_base.TreeBuilder):
def __init__(self, namespaceHTMLElements):
if namespaceHTMLElements:
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
def documentClass(self):
self.soup = BeautifulSoup("")
return Element(self.soup, self.soup, None)
def insertDoctype(self, token):
name = token["name"]
publicId = token["publicId"]
systemId = token["systemId"]
if publicId:
self.soup.insert(0, Declaration("DOCTYPE %s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
elif systemId:
self.soup.insert(0, Declaration("DOCTYPE %s SYSTEM \"%s\""%
(name, systemId)))
else:
self.soup.insert(0, Declaration("DOCTYPE %s"%name))
def elementClass(self, name, namespace):
if namespace is not None:
warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
return Element(Tag(self.soup, name), self.soup, namespace)
def commentClass(self, data):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
self.soup = BeautifulSoup("")
self.soup.name = "[document_fragment]"
return Element(self.soup, self.soup, None)
def appendChild(self, node):
self.soup.insert(len(self.soup.contents), node.element)
def testSerializer(self, element):
return testSerializer(element)
def getDocument(self):
return self.soup
def getFragment(self):
return _base.TreeBuilder.getFragment(self).element
def testSerializer(element):
import re
rv = []
def serializeElement(element, indent=0):
if isinstance(element, Declaration):
doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
m = re.compile(doctype_regexp).match(element.string)
assert m is not None, "DOCTYPE did not match expected format"
name = m.group('name')
publicId = m.group('publicId')
if publicId is not None:
systemId = m.group('systemId1') or ""
else:
systemId = m.group('systemId2')
if publicId is not None or systemId is not None:
rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%
(' '*indent, name, publicId or "", systemId or ""))
else:
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, name))
elif isinstance(element, BeautifulSoup):
if element.name == "[document_fragment]":
rv.append("#document-fragment")
else:
rv.append("#document")
elif isinstance(element, Comment):
rv.append("|%s<!-- %s -->"%(' '*indent, element.string))
elif isinstance(element, unicode):
rv.append("|%s\"%s\"" %(' '*indent, element))
else:
rv.append("|%s<%s>"%(' '*indent, element.name))
if element.attrs:
for name, value in sorted(element.attrs):
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
indent += 2
if hasattr(element, "contents"):
for child in element.contents:
serializeElement(child, indent)
serializeElement(element, 0)
return "\n".join(rv)

Some files were not shown because too many files have changed in this diff Show More