From b8c57827655a9e679c70ba6283f38de801b2cb37 Mon Sep 17 00:00:00 2001 From: Ade Date: Thu, 6 Sep 2012 10:47:07 +1200 Subject: [PATCH] Include html5lib for BeautifulSoup BeautifulSoup needs lxml or html5, have included html5lib. Also latest BeautifulSoup 4.1.3 --- bs4/__init__.py | 14 +- bs4/builder/__init__.py | 11 +- bs4/builder/_lxml.py | 30 +- bs4/dammit.py | 39 +- bs4/element.py | 22 +- bs4/testing.py | 22 + bs4/tests/__init__.py | 1 - bs4/tests/test_builder_registry.py | 141 - bs4/tests/test_docs.py | 36 - bs4/tests/test_html5lib.py | 58 - bs4/tests/test_htmlparser.py | 19 - bs4/tests/test_lxml.py | 75 - bs4/tests/test_soup.py | 368 - bs4/tests/test_tree.py | 1695 - data/interfaces/default/base.html | 2 +- data/interfaces/default/config.html | 2 +- headphones/versioncheck.py | 2 +- html5lib/__init__.py | 17 + html5lib/constants.py | 3085 ++ html5lib/filters/__init__.py | 0 html5lib/filters/_base.py | 10 + html5lib/filters/formfiller.py | 127 + html5lib/filters/inject_meta_charset.py | 62 + html5lib/filters/lint.py | 88 + html5lib/filters/optionaltags.py | 202 + html5lib/filters/sanitizer.py | 8 + html5lib/filters/whitespace.py | 41 + html5lib/html5parser.py | 2733 + html5lib/ihatexml.py | 177 + html5lib/inputstream.py | 782 + html5lib/sanitizer.py | 258 + html5lib/serializer/__init__.py | 17 + html5lib/serializer/htmlserializer.py | 312 + html5lib/serializer/xhtmlserializer.py | 9 + html5lib/tests/__init__.py | 12 + html5lib/tests/mockParser.py | 37 + html5lib/tests/runparsertests.py | 27 + html5lib/tests/runtests.py | 20 + html5lib/tests/support.py | 127 + html5lib/tests/test_encoding.py | 54 + html5lib/tests/test_formfiller.py | 296 + html5lib/tests/test_parser.py | 140 + html5lib/tests/test_parser2.py | 39 + html5lib/tests/test_sanitizer.py | 76 + html5lib/tests/test_serializer.py | 180 + html5lib/tests/test_stream.py | 97 + html5lib/tests/test_tokenizer.py | 193 + html5lib/tests/test_treewalkers.py | 311 + html5lib/tests/test_whitespace_filter.py | 123 + .../tests/testdata/encoding/test-yahoo-jp.dat | 10 + html5lib/tests/testdata/encoding/tests1.dat | 394 + html5lib/tests/testdata/encoding/tests2.dat | 115 + html5lib/tests/testdata/sanitizer/tests1.dat | 501 + html5lib/tests/testdata/serializer/core.test | 125 + .../tests/testdata/serializer/injectmeta.test | 66 + .../testdata/serializer/optionaltags.test | 965 + .../tests/testdata/serializer/options.test | 60 + .../tests/testdata/serializer/whitespace.test | 51 + .../tests/testdata/sniffer/htmlOrFeed.json | 43 + .../testdata/tokenizer/contentModelFlags.test | 75 + html5lib/tests/testdata/tokenizer/domjs.test | 90 + .../tests/testdata/tokenizer/entities.test | 283 + .../tests/testdata/tokenizer/escapeFlag.test | 33 + .../testdata/tokenizer/namedEntities.test | 44189 ++++++++++++++++ .../testdata/tokenizer/numericEntities.test | 1313 + .../tokenizer/pendingSpecChanges.test | 7 + html5lib/tests/testdata/tokenizer/test1.test | 196 + html5lib/tests/testdata/tokenizer/test2.test | 179 + html5lib/tests/testdata/tokenizer/test3.test | 6047 +++ html5lib/tests/testdata/tokenizer/test4.test | 344 + .../testdata/tokenizer/unicodeChars.test | 1295 + .../tokenizer/unicodeCharsProblematic.test | 27 + .../testdata/tokenizer/xmlViolation.test | 22 + .../testdata/tree-construction/adoption01.dat | 194 + .../testdata/tree-construction/adoption02.dat | 31 + .../testdata/tree-construction/comments01.dat | 135 + .../testdata/tree-construction/doctype01.dat | 370 + .../tree-construction/domjs-unsafe.dat | Bin 0 -> 6639 bytes .../testdata/tree-construction/entities01.dat | 603 + .../testdata/tree-construction/entities02.dat | 249 + .../tree-construction/html5test-com.dat | 246 + .../testdata/tree-construction/inbody01.dat | 43 + .../testdata/tree-construction/isindex.dat | 40 + ...pending-spec-changes-plain-text-unsafe.dat | Bin 0 -> 115 bytes .../pending-spec-changes.dat | 52 + .../tree-construction/plain-text-unsafe.dat | Bin 0 -> 4166 bytes .../tree-construction/scriptdata01.dat | 308 + .../testdata/tree-construction/tables01.dat | 212 + .../testdata/tree-construction/tests1.dat | 1952 + .../testdata/tree-construction/tests10.dat | 799 + .../testdata/tree-construction/tests11.dat | 482 + .../testdata/tree-construction/tests12.dat | 62 + .../testdata/tree-construction/tests14.dat | 74 + .../testdata/tree-construction/tests15.dat | 208 + .../testdata/tree-construction/tests16.dat | 2299 + .../testdata/tree-construction/tests17.dat | 153 + .../testdata/tree-construction/tests18.dat | 269 + .../testdata/tree-construction/tests19.dat | 1237 + .../testdata/tree-construction/tests2.dat | 763 + .../testdata/tree-construction/tests20.dat | 455 + .../testdata/tree-construction/tests21.dat | 221 + .../testdata/tree-construction/tests22.dat | 157 + .../testdata/tree-construction/tests23.dat | 155 + .../testdata/tree-construction/tests24.dat | 79 + .../testdata/tree-construction/tests25.dat | 219 + .../testdata/tree-construction/tests26.dat | 313 + .../testdata/tree-construction/tests3.dat | 305 + .../testdata/tree-construction/tests4.dat | 59 + .../testdata/tree-construction/tests5.dat | 191 + .../testdata/tree-construction/tests6.dat | 663 + .../testdata/tree-construction/tests7.dat | 390 + .../testdata/tree-construction/tests8.dat | 148 + .../testdata/tree-construction/tests9.dat | 457 + .../tree-construction/tests_innerHTML_1.dat | 741 + .../testdata/tree-construction/tricky01.dat | 261 + .../testdata/tree-construction/webkit01.dat | 594 + .../testdata/tree-construction/webkit02.dat | 94 + html5lib/tests/tokenizertotree.py | 64 + html5lib/tokenizer.py | 1744 + html5lib/treebuilders/__init__.py | 96 + html5lib/treebuilders/_base.py | 377 + html5lib/treebuilders/dom.py | 291 + html5lib/treebuilders/etree.py | 344 + html5lib/treebuilders/etree_lxml.py | 336 + html5lib/treebuilders/simpletree.py | 256 + html5lib/treebuilders/soup.py | 236 + html5lib/treewalkers/__init__.py | 52 + html5lib/treewalkers/_base.py | 176 + html5lib/treewalkers/dom.py | 41 + html5lib/treewalkers/etree.py | 141 + html5lib/treewalkers/genshistream.py | 70 + html5lib/treewalkers/lxmletree.py | 186 + html5lib/treewalkers/pulldom.py | 60 + html5lib/treewalkers/simpletree.py | 78 + html5lib/treewalkers/soup.py | 60 + html5lib/utils.py | 175 + 136 files changed, 87265 insertions(+), 2428 deletions(-) delete mode 100644 bs4/tests/__init__.py delete mode 100644 bs4/tests/test_builder_registry.py delete mode 100644 bs4/tests/test_docs.py delete mode 100644 bs4/tests/test_html5lib.py delete mode 100644 bs4/tests/test_htmlparser.py delete mode 100644 bs4/tests/test_lxml.py delete mode 100644 bs4/tests/test_soup.py delete mode 100644 bs4/tests/test_tree.py create mode 100644 html5lib/__init__.py create mode 100644 html5lib/constants.py create mode 100644 html5lib/filters/__init__.py create mode 100644 html5lib/filters/_base.py create mode 100644 html5lib/filters/formfiller.py create mode 100644 html5lib/filters/inject_meta_charset.py create mode 100644 html5lib/filters/lint.py create mode 100644 html5lib/filters/optionaltags.py create mode 100644 html5lib/filters/sanitizer.py create mode 100644 html5lib/filters/whitespace.py create mode 100644 html5lib/html5parser.py create mode 100644 html5lib/ihatexml.py create mode 100644 html5lib/inputstream.py create mode 100644 html5lib/sanitizer.py create mode 100644 html5lib/serializer/__init__.py create mode 100644 html5lib/serializer/htmlserializer.py create mode 100644 html5lib/serializer/xhtmlserializer.py create mode 100644 html5lib/tests/__init__.py create mode 100644 html5lib/tests/mockParser.py create mode 100644 html5lib/tests/runparsertests.py create mode 100644 html5lib/tests/runtests.py create mode 100644 html5lib/tests/support.py create mode 100644 html5lib/tests/test_encoding.py create mode 100644 html5lib/tests/test_formfiller.py create mode 100644 html5lib/tests/test_parser.py create mode 100755 html5lib/tests/test_parser2.py create mode 100644 html5lib/tests/test_sanitizer.py create mode 100644 html5lib/tests/test_serializer.py create mode 100755 html5lib/tests/test_stream.py create mode 100644 html5lib/tests/test_tokenizer.py create mode 100644 html5lib/tests/test_treewalkers.py create mode 100644 html5lib/tests/test_whitespace_filter.py create mode 100644 html5lib/tests/testdata/encoding/test-yahoo-jp.dat create mode 100644 html5lib/tests/testdata/encoding/tests1.dat create mode 100644 html5lib/tests/testdata/encoding/tests2.dat create mode 100644 html5lib/tests/testdata/sanitizer/tests1.dat create mode 100644 html5lib/tests/testdata/serializer/core.test create mode 100644 html5lib/tests/testdata/serializer/injectmeta.test create mode 100644 html5lib/tests/testdata/serializer/optionaltags.test create mode 100644 html5lib/tests/testdata/serializer/options.test create mode 100644 html5lib/tests/testdata/serializer/whitespace.test create mode 100644 html5lib/tests/testdata/sniffer/htmlOrFeed.json create mode 100644 html5lib/tests/testdata/tokenizer/contentModelFlags.test create mode 100644 html5lib/tests/testdata/tokenizer/domjs.test create mode 100644 html5lib/tests/testdata/tokenizer/entities.test create mode 100644 html5lib/tests/testdata/tokenizer/escapeFlag.test create mode 100644 html5lib/tests/testdata/tokenizer/namedEntities.test create mode 100644 html5lib/tests/testdata/tokenizer/numericEntities.test create mode 100644 html5lib/tests/testdata/tokenizer/pendingSpecChanges.test create mode 100644 html5lib/tests/testdata/tokenizer/test1.test create mode 100644 html5lib/tests/testdata/tokenizer/test2.test create mode 100644 html5lib/tests/testdata/tokenizer/test3.test create mode 100644 html5lib/tests/testdata/tokenizer/test4.test create mode 100644 html5lib/tests/testdata/tokenizer/unicodeChars.test create mode 100644 html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test create mode 100644 html5lib/tests/testdata/tokenizer/xmlViolation.test create mode 100644 html5lib/tests/testdata/tree-construction/adoption01.dat create mode 100644 html5lib/tests/testdata/tree-construction/adoption02.dat create mode 100644 html5lib/tests/testdata/tree-construction/comments01.dat create mode 100644 html5lib/tests/testdata/tree-construction/doctype01.dat create mode 100644 html5lib/tests/testdata/tree-construction/domjs-unsafe.dat create mode 100644 html5lib/tests/testdata/tree-construction/entities01.dat create mode 100644 html5lib/tests/testdata/tree-construction/entities02.dat create mode 100644 html5lib/tests/testdata/tree-construction/html5test-com.dat create mode 100644 html5lib/tests/testdata/tree-construction/inbody01.dat create mode 100644 html5lib/tests/testdata/tree-construction/isindex.dat create mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat create mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes.dat create mode 100644 html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat create mode 100644 html5lib/tests/testdata/tree-construction/scriptdata01.dat create mode 100644 html5lib/tests/testdata/tree-construction/tables01.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests1.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests10.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests11.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests12.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests14.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests15.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests16.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests17.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests18.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests19.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests2.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests20.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests21.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests22.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests23.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests24.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests25.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests26.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests3.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests4.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests5.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests6.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests7.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests8.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests9.dat create mode 100644 html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat create mode 100644 html5lib/tests/testdata/tree-construction/tricky01.dat create mode 100644 html5lib/tests/testdata/tree-construction/webkit01.dat create mode 100644 html5lib/tests/testdata/tree-construction/webkit02.dat create mode 100644 html5lib/tests/tokenizertotree.py create mode 100644 html5lib/tokenizer.py create mode 100755 html5lib/treebuilders/__init__.py create mode 100755 html5lib/treebuilders/_base.py create mode 100644 html5lib/treebuilders/dom.py create mode 100755 html5lib/treebuilders/etree.py create mode 100644 html5lib/treebuilders/etree_lxml.py create mode 100755 html5lib/treebuilders/simpletree.py create mode 100644 html5lib/treebuilders/soup.py create mode 100644 html5lib/treewalkers/__init__.py create mode 100644 html5lib/treewalkers/_base.py create mode 100644 html5lib/treewalkers/dom.py create mode 100644 html5lib/treewalkers/etree.py create mode 100644 html5lib/treewalkers/genshistream.py create mode 100644 html5lib/treewalkers/lxmletree.py create mode 100644 html5lib/treewalkers/pulldom.py create mode 100644 html5lib/treewalkers/simpletree.py create mode 100644 html5lib/treewalkers/soup.py create mode 100644 html5lib/utils.py diff --git a/bs4/__init__.py b/bs4/__init__.py index af8c718d..80f6f684 100644 --- a/bs4/__init__.py +++ b/bs4/__init__.py @@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.1.0" +__version__ = "4.1.3" __copyright__ = "Copyright (c) 2004-2012 Leonard Richardson" __license__ = "MIT" @@ -149,7 +149,7 @@ class BeautifulSoup(Tag): features = self.DEFAULT_BUILDER_FEATURES builder_class = builder_registry.lookup(*features) if builder_class is None: - raise ValueError( + raise FeatureNotFound( "Couldn't find a tree builder with the features you " "requested: %s. Do you need to install a parser library?" % ",".join(features)) @@ -208,10 +208,10 @@ class BeautifulSoup(Tag): return navigable def insert_before(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_before().") + raise NotImplementedError("BeautifulSoup objects don't support insert_before().") def insert_after(self, successor): - raise ValueError("BeautifulSoup objects don't support insert_after().") + raise NotImplementedError("BeautifulSoup objects don't support insert_after().") def popTag(self): tag = self.tagStack.pop() @@ -267,7 +267,7 @@ class BeautifulSoup(Tag): for i in range(len(self.tagStack) - 1, 0, -1): if (name == self.tagStack[i].name - and nsprefix == self.tagStack[i].nsprefix == nsprefix): + and nsprefix == self.tagStack[i].prefix): numPops = len(self.tagStack) - i break if not inclusivePop: @@ -348,6 +348,10 @@ class StopParsing(Exception): pass +class FeatureNotFound(ValueError): + pass + + #By default, act as an HTML pretty-printer. if __name__ == '__main__': import sys diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py index 4c22b864..dc7deb93 100644 --- a/bs4/builder/__init__.py +++ b/bs4/builder/__init__.py @@ -157,7 +157,16 @@ class TreeBuilder(object): # value is a whitespace-separated list of CSS # classes. Split it into a list. value = attrs[cdata_list_attr] - values = whitespace_re.split(value) + if isinstance(value, basestring): + values = whitespace_re.split(value) + else: + # html5lib sometimes calls setAttributes twice + # for the same tag when rearranging the parse + # tree. On the second call the attribute value + # here is already a list. If this happens, + # leave the value alone rather than trying to + # split it again. + values = value attrs[cdata_list_attr] = values return attrs diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py index c78fdff6..f6b91ff5 100644 --- a/bs4/builder/_lxml.py +++ b/bs4/builder/_lxml.py @@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder): attribute = NamespacedAttribute( "xmlns", prefix, "http://www.w3.org/2000/xmlns/") attrs[attribute] = namespace + + if self.nsmaps is not None and len(self.nsmaps) > 0: + # Namespaces are in play. Find any attributes that came in + # from lxml with namespaces attached to their names, and + # turn then into NamespacedAttribute objects. + new_attrs = {} + for attr, value in attrs.items(): + namespace, attr = self._getNsTag(attr) + if namespace is None: + new_attrs[attr] = value + else: + nsprefix = self._prefix_for_namespace(namespace) + attr = NamespacedAttribute(nsprefix, attr, namespace) + new_attrs[attr] = value + attrs = new_attrs + namespace, name = self._getNsTag(name) - if namespace is not None: - for inverted_nsmap in reversed(self.nsmaps): - if inverted_nsmap is not None and namespace in inverted_nsmap: - nsprefix = inverted_nsmap[namespace] - break + nsprefix = self._prefix_for_namespace(namespace) self.soup.handle_starttag(name, namespace, nsprefix, attrs) + def _prefix_for_namespace(self, namespace): + """Find the currently active prefix for the given namespace.""" + if namespace is None: + return None + for inverted_nsmap in reversed(self.nsmaps): + if inverted_nsmap is not None and namespace in inverted_nsmap: + return inverted_nsmap[namespace] + def end(self, name): self.soup.endData() completed_tag = self.soup.tagStack[-1] diff --git a/bs4/dammit.py b/bs4/dammit.py index 58cad9ba..983ade0f 100644 --- a/bs4/dammit.py +++ b/bs4/dammit.py @@ -10,18 +10,30 @@ encoding; that's the tree builder's job. import codecs from htmlentitydefs import codepoint2name import re -import warnings +import logging -# Autodetects character encodings. Very useful. -# Download from http://chardet.feedparser.org/ -# or 'apt-get install python-chardet' -# or 'easy_install chardet' +# Import a library to autodetect character encodings. +chardet_type = None try: - import chardet - #import chardet.constants - #chardet.constants._debug = 1 + # First try the fast C implementation. + # PyPI package: cchardet + import cchardet + def chardet_dammit(s): + return cchardet.detect(s)['encoding'] except ImportError: - chardet = None + try: + # Fall back to the pure Python implementation + # Debian package: python-chardet + # PyPI package: chardet + import chardet + def chardet_dammit(s): + return chardet.detect(s)['encoding'] + #import chardet.constants + #chardet.constants._debug = 1 + except ImportError: + # No chardet available. + def chardet_dammit(s): + return None # Available from http://cjkpython.i18n.org/. try: @@ -207,8 +219,8 @@ class UnicodeDammit: break # If no luck and we have auto-detection library, try that: - if not u and chardet and not isinstance(self.markup, unicode): - u = self._convert_from(chardet.detect(self.markup)['encoding']) + if not u and not isinstance(self.markup, unicode): + u = self._convert_from(chardet_dammit(self.markup)) # As a last resort, try utf-8 and windows-1252: if not u: @@ -226,10 +238,9 @@ class UnicodeDammit: if proposed_encoding != "ascii": u = self._convert_from(proposed_encoding, "replace") if u is not None: - warnings.warn( - UnicodeWarning( + logging.warning( "Some characters could not be decoded, and were " - "replaced with REPLACEMENT CHARACTER.")) + "replaced with REPLACEMENT CHARACTER.") self.contains_replacement_characters = True break diff --git a/bs4/element.py b/bs4/element.py index 91a40078..26422fda 100644 --- a/bs4/element.py +++ b/bs4/element.py @@ -820,7 +820,7 @@ class Tag(PageElement): for string in self._all_strings(True): yield string - def get_text(self, separator="", strip=False): + def get_text(self, separator=u"", strip=False): """ Get all child strings, concatenated using the given separator. """ @@ -987,7 +987,7 @@ class Tag(PageElement): if isinstance(val, list) or isinstance(val, tuple): val = ' '.join(val) elif not isinstance(val, basestring): - val = str(val) + val = unicode(val) elif ( isinstance(val, AttributeValueWithCharsetSubstitution) and eventual_encoding is not None): @@ -995,20 +995,21 @@ class Tag(PageElement): text = self.format_string(val, formatter) decoded = ( - str(key) + '=' + unicode(key) + '=' + EntitySubstitution.quoted_attribute_value(text)) attrs.append(decoded) close = '' closeTag = '' - if self.is_empty_element: - close = '/' - else: - closeTag = '' % self.name prefix = '' if self.prefix: prefix = self.prefix + ":" + if self.is_empty_element: + close = '/' + else: + closeTag = '' % (prefix, self.name) + pretty_print = (indent_level is not None) if pretty_print: space = (' ' * (indent_level - 1)) @@ -1120,6 +1121,7 @@ class Tag(PageElement): callable that takes a string and returns whether or not the string matches for some custom definition of 'matches'. The same is true of the tag name.""" + generator = self.descendants if not recursive: generator = self.children @@ -1168,6 +1170,12 @@ class SoupStrainer(object): kwargs['class'] = attrs attrs = None + if 'class_' in kwargs: + # Treat class_="foo" as a search for the 'class' + # attribute, overriding any non-dict value for attrs. + kwargs['class'] = kwargs['class_'] + del kwargs['class_'] + if kwargs: if attrs: attrs = attrs.copy() diff --git a/bs4/testing.py b/bs4/testing.py index 5a84b0ba..30e74f42 100644 --- a/bs4/testing.py +++ b/bs4/testing.py @@ -202,6 +202,14 @@ class HTMLTreeBuilderSmokeTest(object): "Bar" "Baz") + def test_deeply_nested_multivalued_attribute(self): + # html5lib can set the attributes of the same tag many times + # as it rearranges the tree. This has caused problems with + # multivalued attributes. + markup = '
' + soup = self.soup(markup) + self.assertEqual(["css"], soup.div.div['class']) + def test_angle_brackets_in_attribute_values_are_escaped(self): self.assertSoupEquals('', '') @@ -445,6 +453,11 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual( soup.encode("utf-8"), markup) + def test_popping_namespaced_tag(self): + markup = 'b2012-07-02T20:33:42Zcd' + soup = self.soup(markup) + self.assertEqual( + unicode(soup.rss), markup) def test_docstring_includes_correct_encoding(self): soup = self.soup("") @@ -472,6 +485,15 @@ class XMLTreeBuilderSmokeTest(object): self.assertEqual("http://example.com/", root['xmlns:a']) self.assertEqual("http://example.net/", root['xmlns:b']) + def test_closing_namespaced_tag(self): + markup = '

20010504

' + soup = self.soup(markup) + self.assertEqual(unicode(soup.p), markup) + + def test_namespaced_attributes(self): + markup = '' + soup = self.soup(markup) + self.assertEqual(unicode(soup.foo), markup) class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): """Smoke test for a tree builder that supports HTML5.""" diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py deleted file mode 100644 index 142c8cc3..00000000 --- a/bs4/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"The beautifulsoup tests." diff --git a/bs4/tests/test_builder_registry.py b/bs4/tests/test_builder_registry.py deleted file mode 100644 index 92ad10fb..00000000 --- a/bs4/tests/test_builder_registry.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Tests of the builder registry.""" - -import unittest - -from bs4 import BeautifulSoup -from bs4.builder import ( - builder_registry as registry, - HTMLParserTreeBuilder, - TreeBuilderRegistry, -) - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError: - HTML5LIB_PRESENT = False - -try: - from bs4.builder import ( - LXMLTreeBuilderForXML, - LXMLTreeBuilder, - ) - LXML_PRESENT = True -except ImportError: - LXML_PRESENT = False - - -class BuiltInRegistryTest(unittest.TestCase): - """Test the built-in registry with the default builders registered.""" - - def test_combination(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('fast', 'html'), - LXMLTreeBuilder) - - if LXML_PRESENT: - self.assertEqual(registry.lookup('permissive', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('strict', 'html'), - HTMLParserTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib', 'html'), - HTML5TreeBuilder) - - def test_lookup_by_markup_type(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('html'), LXMLTreeBuilder) - self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML) - else: - self.assertEqual(registry.lookup('xml'), None) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html'), HTML5TreeBuilder) - else: - self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder) - - def test_named_library(self): - if LXML_PRESENT: - self.assertEqual(registry.lookup('lxml', 'xml'), - LXMLTreeBuilderForXML) - self.assertEqual(registry.lookup('lxml', 'html'), - LXMLTreeBuilder) - if HTML5LIB_PRESENT: - self.assertEqual(registry.lookup('html5lib'), - HTML5TreeBuilder) - - self.assertEqual(registry.lookup('html.parser'), - HTMLParserTreeBuilder) - - def test_beautifulsoup_constructor_does_lookup(self): - # You can pass in a string. - BeautifulSoup("", features="html") - # Or a list of strings. - BeautifulSoup("", features=["html", "fast"]) - - # You'll get an exception if BS can't find an appropriate - # builder. - self.assertRaises(ValueError, BeautifulSoup, - "", features="no-such-feature") - -class RegistryTest(unittest.TestCase): - """Test the TreeBuilderRegistry class in general.""" - - def setUp(self): - self.registry = TreeBuilderRegistry() - - def builder_for_features(self, *feature_list): - cls = type('Builder_' + '_'.join(feature_list), - (object,), {'features' : feature_list}) - - self.registry.register(cls) - return cls - - def test_register_with_no_features(self): - builder = self.builder_for_features() - - # Since the builder advertises no features, you can't find it - # by looking up features. - self.assertEqual(self.registry.lookup('foo'), None) - - # But you can find it by doing a lookup with no features, if - # this happens to be the only registered builder. - self.assertEqual(self.registry.lookup(), builder) - - def test_register_with_features_makes_lookup_succeed(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('foo'), builder) - self.assertEqual(self.registry.lookup('bar'), builder) - - def test_lookup_fails_when_no_builder_implements_feature(self): - builder = self.builder_for_features('foo', 'bar') - self.assertEqual(self.registry.lookup('baz'), None) - - def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): - builder1 = self.builder_for_features('foo') - builder2 = self.builder_for_features('bar') - self.assertEqual(self.registry.lookup(), builder2) - - def test_lookup_fails_when_no_tree_builders_registered(self): - self.assertEqual(self.registry.lookup(), None) - - def test_lookup_gets_most_recent_builder_supporting_all_features(self): - has_one = self.builder_for_features('foo') - has_the_other = self.builder_for_features('bar') - has_both_early = self.builder_for_features('foo', 'bar', 'baz') - has_both_late = self.builder_for_features('foo', 'bar', 'quux') - lacks_one = self.builder_for_features('bar') - has_the_other = self.builder_for_features('foo') - - # There are two builders featuring 'foo' and 'bar', but - # the one that also features 'quux' was registered later. - self.assertEqual(self.registry.lookup('foo', 'bar'), - has_both_late) - - # There is only one builder featuring 'foo', 'bar', and 'baz'. - self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'), - has_both_early) - - def test_lookup_fails_when_cannot_reconcile_requested_features(self): - builder1 = self.builder_for_features('foo', 'bar') - builder2 = self.builder_for_features('foo', 'baz') - self.assertEqual(self.registry.lookup('bar', 'baz'), None) diff --git a/bs4/tests/test_docs.py b/bs4/tests/test_docs.py deleted file mode 100644 index 5b9f6770..00000000 --- a/bs4/tests/test_docs.py +++ /dev/null @@ -1,36 +0,0 @@ -"Test harness for doctests." - -# pylint: disable-msg=E0611,W0142 - -__metaclass__ = type -__all__ = [ - 'additional_tests', - ] - -import atexit -import doctest -import os -#from pkg_resources import ( -# resource_filename, resource_exists, resource_listdir, cleanup_resources) -import unittest - -DOCTEST_FLAGS = ( - doctest.ELLIPSIS | - doctest.NORMALIZE_WHITESPACE | - doctest.REPORT_NDIFF) - - -# def additional_tests(): -# "Run the doc tests (README.txt and docs/*, if any exist)" -# doctest_files = [ -# os.path.abspath(resource_filename('bs4', 'README.txt'))] -# if resource_exists('bs4', 'docs'): -# for name in resource_listdir('bs4', 'docs'): -# if name.endswith('.txt'): -# doctest_files.append( -# os.path.abspath( -# resource_filename('bs4', 'docs/%s' % name))) -# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS) -# atexit.register(cleanup_resources) -# return unittest.TestSuite(( -# doctest.DocFileSuite(*doctest_files, **kwargs))) diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py deleted file mode 100644 index f195f7d0..00000000 --- a/bs4/tests/test_html5lib.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Tests to ensure that the html5lib tree builder generates good trees.""" - -import warnings - -try: - from bs4.builder import HTML5TreeBuilder - HTML5LIB_PRESENT = True -except ImportError, e: - HTML5LIB_PRESENT = False -from bs4.element import SoupStrainer -from bs4.testing import ( - HTML5TreeBuilderSmokeTest, - SoupTest, - skipIf, -) - -@skipIf( - not HTML5LIB_PRESENT, - "html5lib seems not to be present, not testing its tree builder.") -class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): - """See ``HTML5TreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return HTML5TreeBuilder() - - def test_soupstrainer(self): - # The html5lib tree builder does not support SoupStrainers. - strainer = SoupStrainer("b") - markup = "

A bold statement.

" - with warnings.catch_warnings(record=True) as w: - soup = self.soup(markup, parse_only=strainer) - self.assertEqual( - soup.decode(), self.document_for(markup)) - - self.assertTrue( - "the html5lib tree builder doesn't support parse_only" in - str(w[0].message)) - - def test_correctly_nested_tables(self): - """html5lib inserts tags where other parsers don't.""" - markup = ('' - '' - "') - - self.assertSoupEquals( - markup, - '
Here's another table:" - '' - '' - '
foo
Here\'s another table:' - '
foo
' - '
') - - self.assertSoupEquals( - "" - "" - "
Foo
Bar
Baz
") diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py deleted file mode 100644 index bcb5ed23..00000000 --- a/bs4/tests/test_htmlparser.py +++ /dev/null @@ -1,19 +0,0 @@ -"""Tests to ensure that the html.parser tree builder generates good -trees.""" - -from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest -from bs4.builder import HTMLParserTreeBuilder - -class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - - @property - def default_builder(self): - return HTMLParserTreeBuilder() - - def test_namespaced_system_doctype(self): - # html.parser can't handle namespaced doctypes, so skip this one. - pass - - def test_namespaced_public_doctype(self): - # html.parser can't handle namespaced doctypes, so skip this one. - pass diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py deleted file mode 100644 index 39e26bfb..00000000 --- a/bs4/tests/test_lxml.py +++ /dev/null @@ -1,75 +0,0 @@ -"""Tests to ensure that the lxml tree builder generates good trees.""" - -import re -import warnings - -try: - from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML - LXML_PRESENT = True -except ImportError, e: - LXML_PRESENT = False - -from bs4 import ( - BeautifulSoup, - BeautifulStoneSoup, - ) -from bs4.element import Comment, Doctype, SoupStrainer -from bs4.testing import skipIf -from bs4.tests import test_htmlparser -from bs4.testing import ( - HTMLTreeBuilderSmokeTest, - XMLTreeBuilderSmokeTest, - SoupTest, - skipIf, -) - -@skipIf( - not LXML_PRESENT, - "lxml seems not to be present, not testing its tree builder.") -class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): - """See ``HTMLTreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return LXMLTreeBuilder() - - def test_out_of_range_entity(self): - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - self.assertSoupEquals( - "

foo�bar

", "

foobar

") - - def test_beautifulstonesoup_is_xml_parser(self): - # Make sure that the deprecated BSS class uses an xml builder - # if one is installed. - with warnings.catch_warnings(record=False) as w: - soup = BeautifulStoneSoup("") - self.assertEqual(u"", unicode(soup.b)) - - def test_real_xhtml_document(self): - """lxml strips the XML definition from an XHTML doc, which is fine.""" - markup = b""" - - -Hello. -Goodbye. -""" - soup = self.soup(markup) - self.assertEqual( - soup.encode("utf-8").replace(b"\n", b''), - markup.replace(b'\n', b'').replace( - b'', b'')) - - -@skipIf( - not LXML_PRESENT, - "lxml seems not to be present, not testing its XML tree builder.") -class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest): - """See ``HTMLTreeBuilderSmokeTest``.""" - - @property - def default_builder(self): - return LXMLTreeBuilderForXML() - diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py deleted file mode 100644 index 23a664e7..00000000 --- a/bs4/tests/test_soup.py +++ /dev/null @@ -1,368 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests of Beautiful Soup as a whole.""" - -import unittest -from bs4 import ( - BeautifulSoup, - BeautifulStoneSoup, -) -from bs4.element import ( - CharsetMetaAttributeValue, - ContentMetaAttributeValue, - SoupStrainer, - NamespacedAttribute, - ) -import bs4.dammit -from bs4.dammit import EntitySubstitution, UnicodeDammit -from bs4.testing import ( - SoupTest, - skipIf, -) -import warnings - -try: - from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML - LXML_PRESENT = True -except ImportError, e: - LXML_PRESENT = False - -class TestDeprecatedConstructorArguments(SoupTest): - - def test_parseOnlyThese_renamed_to_parse_only(self): - with warnings.catch_warnings(record=True) as w: - soup = self.soup("", parseOnlyThese=SoupStrainer("b")) - msg = str(w[0].message) - self.assertTrue("parseOnlyThese" in msg) - self.assertTrue("parse_only" in msg) - self.assertEqual(b"", soup.encode()) - - def test_fromEncoding_renamed_to_from_encoding(self): - with warnings.catch_warnings(record=True) as w: - utf8 = b"\xc3\xa9" - soup = self.soup(utf8, fromEncoding="utf8") - msg = str(w[0].message) - self.assertTrue("fromEncoding" in msg) - self.assertTrue("from_encoding" in msg) - self.assertEqual("utf8", soup.original_encoding) - - def test_unrecognized_keyword_argument(self): - self.assertRaises( - TypeError, self.soup, "", no_such_argument=True) - - @skipIf( - not LXML_PRESENT, - "lxml not present, not testing BeautifulStoneSoup.") - def test_beautifulstonesoup(self): - with warnings.catch_warnings(record=True) as w: - soup = BeautifulStoneSoup("") - self.assertTrue(isinstance(soup, BeautifulSoup)) - self.assertTrue("BeautifulStoneSoup class is deprecated") - -class TestSelectiveParsing(SoupTest): - - def test_parse_with_soupstrainer(self): - markup = "NoYesNoYes Yes" - strainer = SoupStrainer("b") - soup = self.soup(markup, parse_only=strainer) - self.assertEqual(soup.encode(), b"YesYes Yes") - - -class TestEntitySubstitution(unittest.TestCase): - """Standalone tests of the EntitySubstitution class.""" - def setUp(self): - self.sub = EntitySubstitution - - def test_simple_html_substitution(self): - # Unicode characters corresponding to named HTML entites - # are substituted, and no others. - s = u"foo\u2200\N{SNOWMAN}\u00f5bar" - self.assertEqual(self.sub.substitute_html(s), - u"foo∀\N{SNOWMAN}õbar") - - def test_smart_quote_substitution(self): - # MS smart quotes are a common source of frustration, so we - # give them a special test. - quotes = b"\x91\x92foo\x93\x94" - dammit = UnicodeDammit(quotes) - self.assertEqual(self.sub.substitute_html(dammit.markup), - "‘’foo“”") - - def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self): - s = 'Welcome to "my bar"' - self.assertEqual(self.sub.substitute_xml(s, False), s) - - def test_xml_attribute_quoting_normally_uses_double_quotes(self): - self.assertEqual(self.sub.substitute_xml("Welcome", True), - '"Welcome"') - self.assertEqual(self.sub.substitute_xml("Bob's Bar", True), - '"Bob\'s Bar"') - - def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self): - s = 'Welcome to "my bar"' - self.assertEqual(self.sub.substitute_xml(s, True), - "'Welcome to \"my bar\"'") - - def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self): - s = 'Welcome to "Bob\'s Bar"' - self.assertEqual( - self.sub.substitute_xml(s, True), - '"Welcome to "Bob\'s Bar""') - - def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self): - quoted = 'Welcome to "Bob\'s Bar"' - self.assertEqual(self.sub.substitute_xml(quoted), quoted) - - def test_xml_quoting_handles_angle_brackets(self): - self.assertEqual( - self.sub.substitute_xml("foo"), - "foo<bar>") - - def test_xml_quoting_handles_ampersands(self): - self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T") - - def test_xml_quoting_ignores_ampersands_when_they_are_part_of_an_entity(self): - self.assertEqual( - self.sub.substitute_xml("ÁT&T"), - "ÁT&T") - - def test_quotes_not_html_substituted(self): - """There's no need to do this except inside attribute values.""" - text = 'Bob\'s "bar"' - self.assertEqual(self.sub.substitute_html(text), text) - - -class TestEncodingConversion(SoupTest): - # Test Beautiful Soup's ability to decode and encode from various - # encodings. - - def setUp(self): - super(TestEncodingConversion, self).setUp() - self.unicode_data = u"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!" - self.utf8_data = self.unicode_data.encode("utf-8") - # Just so you know what it looks like. - self.assertEqual( - self.utf8_data, - b"Sacr\xc3\xa9 bleu!") - - def test_ascii_in_unicode_out(self): - # ASCII input is converted to Unicode. The original_encoding - # attribute is set. - ascii = b"a" - soup_from_ascii = self.soup(ascii) - unicode_output = soup_from_ascii.decode() - self.assertTrue(isinstance(unicode_output, unicode)) - self.assertEqual(unicode_output, self.document_for(ascii.decode())) - self.assertEqual(soup_from_ascii.original_encoding, "ascii") - - def test_unicode_in_unicode_out(self): - # Unicode input is left alone. The original_encoding attribute - # is not set. - soup_from_unicode = self.soup(self.unicode_data) - self.assertEqual(soup_from_unicode.decode(), self.unicode_data) - self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') - self.assertEqual(soup_from_unicode.original_encoding, None) - - def test_utf8_in_unicode_out(self): - # UTF-8 input is converted to Unicode. The original_encoding - # attribute is set. - soup_from_utf8 = self.soup(self.utf8_data) - self.assertEqual(soup_from_utf8.decode(), self.unicode_data) - self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') - - def test_utf8_out(self): - # The internal data structures can be encoded as UTF-8. - soup_from_unicode = self.soup(self.unicode_data) - self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) - - -class TestUnicodeDammit(unittest.TestCase): - """Standalone tests of Unicode, Dammit.""" - - def test_smart_quotes_to_unicode(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup) - self.assertEqual( - dammit.unicode_markup, u"\u2018\u2019\u201c\u201d") - - def test_smart_quotes_to_xml_entities(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="xml") - self.assertEqual( - dammit.unicode_markup, "‘’“”") - - def test_smart_quotes_to_html_entities(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="html") - self.assertEqual( - dammit.unicode_markup, "‘’“”") - - def test_smart_quotes_to_ascii(self): - markup = b"\x91\x92\x93\x94" - dammit = UnicodeDammit(markup, smart_quotes_to="ascii") - self.assertEqual( - dammit.unicode_markup, """''""""") - - def test_detect_utf8(self): - utf8 = b"\xc3\xa9" - dammit = UnicodeDammit(utf8) - self.assertEqual(dammit.unicode_markup, u'\xe9') - self.assertEqual(dammit.original_encoding, 'utf-8') - - def test_convert_hebrew(self): - hebrew = b"\xed\xe5\xec\xf9" - dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) - self.assertEqual(dammit.original_encoding, 'iso-8859-8') - self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9') - - def test_dont_see_smart_quotes_where_there_are_none(self): - utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" - dammit = UnicodeDammit(utf_8) - self.assertEqual(dammit.original_encoding, 'utf-8') - self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) - - def test_ignore_inappropriate_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") - dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) - self.assertEqual(dammit.original_encoding, 'utf-8') - - def test_ignore_invalid_codecs(self): - utf8_data = u"Räksmörgås".encode("utf-8") - for bad_encoding in ['.utf8', '...', 'utF---16.!']: - dammit = UnicodeDammit(utf8_data, [bad_encoding]) - self.assertEqual(dammit.original_encoding, 'utf-8') - - def test_detect_html5_style_meta_tag(self): - - for data in ( - b'', - b"", - b"", - b""): - dammit = UnicodeDammit(data, is_html=True) - self.assertEqual( - "euc-jp", dammit.original_encoding) - - def test_last_ditch_entity_replacement(self): - # This is a UTF-8 document that contains bytestrings - # completely incompatible with UTF-8 (ie. encoded with some other - # encoding). - # - # Since there is no consistent encoding for the document, - # Unicode, Dammit will eventually encode the document as UTF-8 - # and encode the incompatible characters as REPLACEMENT - # CHARACTER. - # - # If chardet is installed, it will detect that the document - # can be converted into ISO-8859-1 without errors. This happens - # to be the wrong encoding, but it is a consistent encoding, so the - # code we're testing here won't run. - # - # So we temporarily disable chardet if it's present. - doc = b"""\357\273\277 -\330\250\330\252\330\261 -\310\322\321\220\312\321\355\344""" - chardet = bs4.dammit.chardet - try: - bs4.dammit.chardet = None - with warnings.catch_warnings(record=True) as w: - dammit = UnicodeDammit(doc) - self.assertEqual(True, dammit.contains_replacement_characters) - self.assertTrue(u"\ufffd" in dammit.unicode_markup) - - soup = BeautifulSoup(doc, "html.parser") - self.assertTrue(soup.contains_replacement_characters) - - msg = w[0].message - self.assertTrue(isinstance(msg, UnicodeWarning)) - self.assertTrue("Some characters could not be decoded" in str(msg)) - finally: - bs4.dammit.chardet = chardet - - def test_sniffed_xml_encoding(self): - # A document written in UTF-16LE will be converted by a different - # code path that sniffs the byte order markers. - data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' - dammit = UnicodeDammit(data) - self.assertEqual(u"áé", dammit.unicode_markup) - self.assertEqual("utf-16le", dammit.original_encoding) - - def test_detwingle(self): - # Here's a UTF8 document. - utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8") - - # Here's a Windows-1252 document. - windows_1252 = ( - u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" - u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") - - # Through some unholy alchemy, they've been stuck together. - doc = utf8 + windows_1252 + utf8 - - # The document can't be turned into UTF-8: - self.assertRaises(UnicodeDecodeError, doc.decode, "utf8") - - # Unicode, Dammit thinks the whole document is Windows-1252, - # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃" - - # But if we run it through fix_embedded_windows_1252, it's fixed: - - fixed = UnicodeDammit.detwingle(doc) - self.assertEqual( - u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) - - def test_detwingle_ignores_multibyte_characters(self): - # Each of these characters has a UTF-8 representation ending - # in \x93. \x93 is a smart quote if interpreted as - # Windows-1252. But our code knows to skip over multibyte - # UTF-8 characters, so they'll survive the process unscathed. - for tricky_unicode_char in ( - u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' - u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' - u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one. - ): - input = tricky_unicode_char.encode("utf8") - self.assertTrue(input.endswith(b'\x93')) - output = UnicodeDammit.detwingle(input) - self.assertEqual(output, input) - -class TestNamedspacedAttribute(SoupTest): - - def test_name_may_be_none(self): - a = NamespacedAttribute("xmlns", None) - self.assertEqual(a, "xmlns") - - def test_attribute_is_equivalent_to_colon_separated_string(self): - a = NamespacedAttribute("a", "b") - self.assertEqual("a:b", a) - - def test_attributes_are_equivalent_if_prefix_and_name_identical(self): - a = NamespacedAttribute("a", "b", "c") - b = NamespacedAttribute("a", "b", "c") - self.assertEqual(a, b) - - # The actual namespace is not considered. - c = NamespacedAttribute("a", "b", None) - self.assertEqual(a, c) - - # But name and prefix are important. - d = NamespacedAttribute("a", "z", "c") - self.assertNotEqual(a, d) - - e = NamespacedAttribute("z", "b", "c") - self.assertNotEqual(a, e) - - -class TestAttributeValueWithCharsetSubstitution(unittest.TestCase): - - def test_content_meta_attribute_value(self): - value = CharsetMetaAttributeValue("euc-jp") - self.assertEqual("euc-jp", value) - self.assertEqual("euc-jp", value.original_value) - self.assertEqual("utf8", value.encode("utf8")) - - - def test_content_meta_attribute_value(self): - value = ContentMetaAttributeValue("text/html; charset=euc-jp") - self.assertEqual("text/html; charset=euc-jp", value) - self.assertEqual("text/html; charset=euc-jp", value.original_value) - self.assertEqual("text/html; charset=utf8", value.encode("utf8")) diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py deleted file mode 100644 index cc573ede..00000000 --- a/bs4/tests/test_tree.py +++ /dev/null @@ -1,1695 +0,0 @@ -# -*- coding: utf-8 -*- -"""Tests for Beautiful Soup's tree traversal methods. - -The tree traversal methods are the main advantage of using Beautiful -Soup over just using a parser. - -Different parsers will build different Beautiful Soup trees given the -same markup, but all Beautiful Soup trees can be traversed with the -methods tested here. -""" - -import copy -import pickle -import re -import warnings -from bs4 import BeautifulSoup -from bs4.builder import ( - builder_registry, - HTMLParserTreeBuilder, -) -from bs4.element import ( - CData, - Doctype, - NavigableString, - SoupStrainer, - Tag, -) -from bs4.testing import ( - SoupTest, - skipIf, -) - -XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None) -LXML_PRESENT = (builder_registry.lookup("lxml") is not None) - -class TreeTest(SoupTest): - - def assertSelects(self, tags, should_match): - """Make sure that the given tags have the correct text. - - This is used in tests that define a bunch of tags, each - containing a single string, and then select certain strings by - some mechanism. - """ - self.assertEqual([tag.string for tag in tags], should_match) - - def assertSelectsIDs(self, tags, should_match): - """Make sure that the given tags have the correct IDs. - - This is used in tests that define a bunch of tags, each - containing a single string, and then select certain strings by - some mechanism. - """ - self.assertEqual([tag['id'] for tag in tags], should_match) - - -class TestFind(TreeTest): - """Basic tests of the find() method. - - find() just calls find_all() with limit=1, so it's not tested all - that thouroughly here. - """ - - def test_find_tag(self): - soup = self.soup("1234") - self.assertEqual(soup.find("b").string, "2") - - def test_unicode_text_find(self): - soup = self.soup(u'

Räksmörgås

') - self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås') - -class TestFindAll(TreeTest): - """Basic tests of the find_all() method.""" - - def test_find_all_text_nodes(self): - """You can search the tree for text nodes.""" - soup = self.soup("Foobar\xbb") - # Exact match. - self.assertEqual(soup.find_all(text="bar"), [u"bar"]) - # Match any of a number of strings. - self.assertEqual( - soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) - # Match a regular expression. - self.assertEqual(soup.find_all(text=re.compile('.*')), - [u"Foo", u"bar", u'\xbb']) - # Match anything. - self.assertEqual(soup.find_all(text=True), - [u"Foo", u"bar", u'\xbb']) - - def test_find_all_limit(self): - """You can limit the number of items returned by find_all.""" - soup = self.soup("12345") - self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"]) - self.assertSelects(soup.find_all('a', limit=1), ["1"]) - self.assertSelects( - soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"]) - - # A limit of 0 means no limit. - self.assertSelects( - soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"]) - - def test_calling_a_tag_is_calling_findall(self): - soup = self.soup("123") - self.assertSelects(soup('a', limit=1), ["1"]) - self.assertSelects(soup.b(id="foo"), ["3"]) - - def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self): - soup = self.soup("") - # Create a self-referential list. - l = [] - l.append(l) - - # Without special code in _normalize_search_value, this would cause infinite - # recursion. - self.assertEqual([], soup.find_all(l)) - -class TestFindAllBasicNamespaces(TreeTest): - - def test_find_by_namespaced_name(self): - soup = self.soup('4') - self.assertEqual("4", soup.find("mathml:msqrt").string) - self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name) - - -class TestFindAllByName(TreeTest): - """Test ways of finding tags by tag name.""" - - def setUp(self): - super(TreeTest, self).setUp() - self.tree = self.soup("""First tag. - Second tag. - Third Nested tag. tag.""") - - def test_find_all_by_tag_name(self): - # Find all the tags. - self.assertSelects( - self.tree.find_all('a'), ['First tag.', 'Nested tag.']) - - def test_find_all_by_name_and_text(self): - self.assertSelects( - self.tree.find_all('a', text='First tag.'), ['First tag.']) - - self.assertSelects( - self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.']) - - self.assertSelects( - self.tree.find_all('a', text=re.compile("tag")), - ['First tag.', 'Nested tag.']) - - - def test_find_all_on_non_root_element(self): - # You can call find_all on any node, not just the root. - self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.']) - - def test_calling_element_invokes_find_all(self): - self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.']) - - def test_find_all_by_tag_strainer(self): - self.assertSelects( - self.tree.find_all(SoupStrainer('a')), - ['First tag.', 'Nested tag.']) - - def test_find_all_by_tag_names(self): - self.assertSelects( - self.tree.find_all(['a', 'b']), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_by_tag_dict(self): - self.assertSelects( - self.tree.find_all({'a' : True, 'b' : True}), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_by_tag_re(self): - self.assertSelects( - self.tree.find_all(re.compile('^[ab]$')), - ['First tag.', 'Second tag.', 'Nested tag.']) - - def test_find_all_with_tags_matching_method(self): - # You can define an oracle method that determines whether - # a tag matches the search. - def id_matches_name(tag): - return tag.name == tag.get('id') - - tree = self.soup("""Match 1. - Does not match. - Match 2.""") - - self.assertSelects( - tree.find_all(id_matches_name), ["Match 1.", "Match 2."]) - - -class TestFindAllByAttribute(TreeTest): - - def test_find_all_by_attribute_name(self): - # You can pass in keyword arguments to find_all to search by - # attribute. - tree = self.soup(""" - Matching a. - - Non-matching Matching b.a. - """) - self.assertSelects(tree.find_all(id='first'), - ["Matching a.", "Matching b."]) - - def test_find_all_by_utf8_attribute_value(self): - peace = u"םולש".encode("utf8") - data = u''.encode("utf8") - soup = self.soup(data) - self.assertEqual([soup.a], soup.find_all(title=peace)) - self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) - self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"])) - - def test_find_all_by_attribute_dict(self): - # You can pass in a dictionary as the argument 'attrs'. This - # lets you search for attributes like 'name' (a fixed argument - # to find_all) and 'class' (a reserved word in Python.) - tree = self.soup(""" - Name match. - Class match. - Non-match. - A tag called 'name1'. - """) - - # This doesn't do what you want. - self.assertSelects(tree.find_all(name='name1'), - ["A tag called 'name1'."]) - # This does what you want. - self.assertSelects(tree.find_all(attrs={'name' : 'name1'}), - ["Name match."]) - - # Passing class='class2' would cause a syntax error. - self.assertSelects(tree.find_all(attrs={'class' : 'class2'}), - ["Class match."]) - - def test_find_all_by_class(self): - # Passing in a string to 'attrs' will search the CSS class. - tree = self.soup(""" - Class 1. - Class 2. - Class 1. - Class 3 and 4. - """) - self.assertSelects(tree.find_all('a', '1'), ['Class 1.']) - self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.']) - self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.']) - self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.']) - - def test_find_by_class_when_multiple_classes_present(self): - tree = self.soup("Found it") - - attrs = { 'class' : re.compile("o") } - f = tree.find_all("gar", attrs=attrs) - self.assertSelects(f, ["Found it"]) - - f = tree.find_all("gar", re.compile("a")) - self.assertSelects(f, ["Found it"]) - - # Since the class is not the string "foo bar", but the two - # strings "foo" and "bar", this will not find anything. - attrs = { 'class' : re.compile("o b") } - f = tree.find_all("gar", attrs=attrs) - self.assertSelects(f, []) - - def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self): - soup = self.soup("Found it") - - self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"]) - - def big_attribute_value(value): - return len(value) > 3 - - self.assertSelects(soup.find_all("a", big_attribute_value), []) - - def small_attribute_value(value): - return len(value) <= 3 - - self.assertSelects( - soup.find_all("a", small_attribute_value), ["Found it"]) - - def test_find_all_with_string_for_attrs_finds_multiple_classes(self): - soup = self.soup('') - a, a2 = soup.find_all("a") - self.assertEqual([a, a2], soup.find_all("a", "foo")) - self.assertEqual([a], soup.find_all("a", "bar")) - - # If you specify the attribute as a string that contains a - # space, only that specific value will be found. - self.assertEqual([a], soup.find_all("a", "foo bar")) - self.assertEqual([], soup.find_all("a", "bar foo")) - - def test_find_all_by_attribute_soupstrainer(self): - tree = self.soup(""" - Match. - Non-match.""") - - strainer = SoupStrainer(attrs={'id' : 'first'}) - self.assertSelects(tree.find_all(strainer), ['Match.']) - - def test_find_all_with_missing_atribute(self): - # You can pass in None as the value of an attribute to find_all. - # This will match tags that do not have that attribute set. - tree = self.soup("""ID present. - No ID present. - ID is empty.""") - self.assertSelects(tree.find_all('a', id=None), ["No ID present."]) - - def test_find_all_with_defined_attribute(self): - # You can pass in None as the value of an attribute to find_all. - # This will match tags that have that attribute set to any value. - tree = self.soup("""ID present. - No ID present. - ID is empty.""") - self.assertSelects( - tree.find_all(id=True), ["ID present.", "ID is empty."]) - - def test_find_all_with_numeric_attribute(self): - # If you search for a number, it's treated as a string. - tree = self.soup("""Unquoted attribute. - Quoted attribute.""") - - expected = ["Unquoted attribute.", "Quoted attribute."] - self.assertSelects(tree.find_all(id=1), expected) - self.assertSelects(tree.find_all(id="1"), expected) - - def test_find_all_with_list_attribute_values(self): - # You can pass a list of attribute values instead of just one, - # and you'll get tags that match any of the values. - tree = self.soup("""1 - 2 - 3 - No ID.""") - self.assertSelects(tree.find_all(id=["1", "3", "4"]), - ["1", "3"]) - - def test_find_all_with_regular_expression_attribute_value(self): - # You can pass a regular expression as an attribute value, and - # you'll get tags whose values for that attribute match the - # regular expression. - tree = self.soup("""One a. - Two as. - Mixed as and bs. - One b. - No ID.""") - - self.assertSelects(tree.find_all(id=re.compile("^a+$")), - ["One a.", "Two as."]) - - def test_find_by_name_and_containing_string(self): - soup = self.soup("foobarfoo") - a = soup.a - - self.assertEqual([a], soup.find_all("a", text="foo")) - self.assertEqual([], soup.find_all("a", text="bar")) - self.assertEqual([], soup.find_all("a", text="bar")) - - def test_find_by_name_and_containing_string_when_string_is_buried(self): - soup = self.soup("foofoo") - self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo")) - - def test_find_by_attribute_and_containing_string(self): - soup = self.soup('foofoo') - a = soup.a - - self.assertEqual([a], soup.find_all(id=2, text="foo")) - self.assertEqual([], soup.find_all(id=1, text="bar")) - - - - -class TestIndex(TreeTest): - """Test Tag.index""" - def test_index(self): - tree = self.soup("""
- Identical - Not identical - Identical - - Identical with child - Also not identical - Identical with child -
""") - div = tree.div - for i, element in enumerate(div.contents): - self.assertEqual(i, div.index(element)) - self.assertRaises(ValueError, tree.index, 1) - - -class TestParentOperations(TreeTest): - """Test navigation and searching through an element's parents.""" - - def setUp(self): - super(TestParentOperations, self).setUp() - self.tree = self.soup('''
    -
      -
        -
          - Start here -
        -
      ''') - self.start = self.tree.b - - - def test_parent(self): - self.assertEqual(self.start.parent['id'], 'bottom') - self.assertEqual(self.start.parent.parent['id'], 'middle') - self.assertEqual(self.start.parent.parent.parent['id'], 'top') - - def test_parent_of_top_tag_is_soup_object(self): - top_tag = self.tree.contents[0] - self.assertEqual(top_tag.parent, self.tree) - - def test_soup_object_has_no_parent(self): - self.assertEqual(None, self.tree.parent) - - def test_find_parents(self): - self.assertSelectsIDs( - self.start.find_parents('ul'), ['bottom', 'middle', 'top']) - self.assertSelectsIDs( - self.start.find_parents('ul', id="middle"), ['middle']) - - def test_find_parent(self): - self.assertEqual(self.start.find_parent('ul')['id'], 'bottom') - - def test_parent_of_text_element(self): - text = self.tree.find(text="Start here") - self.assertEqual(text.parent.name, 'b') - - def test_text_element_find_parent(self): - text = self.tree.find(text="Start here") - self.assertEqual(text.find_parent('ul')['id'], 'bottom') - - def test_parent_generator(self): - parents = [parent['id'] for parent in self.start.parents - if parent is not None and 'id' in parent.attrs] - self.assertEqual(parents, ['bottom', 'middle', 'top']) - - -class ProximityTest(TreeTest): - - def setUp(self): - super(TreeTest, self).setUp() - self.tree = self.soup( - 'OneTwoThree') - - -class TestNextOperations(ProximityTest): - - def setUp(self): - super(TestNextOperations, self).setUp() - self.start = self.tree.b - - def test_next(self): - self.assertEqual(self.start.next_element, "One") - self.assertEqual(self.start.next_element.next_element['id'], "2") - - def test_next_of_last_item_is_none(self): - last = self.tree.find(text="Three") - self.assertEqual(last.next_element, None) - - def test_next_of_root_is_none(self): - # The document root is outside the next/previous chain. - self.assertEqual(self.tree.next_element, None) - - def test_find_all_next(self): - self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"]) - self.start.find_all_next(id=3) - self.assertSelects(self.start.find_all_next(id=3), ["Three"]) - - def test_find_next(self): - self.assertEqual(self.start.find_next('b')['id'], '2') - self.assertEqual(self.start.find_next(text="Three"), "Three") - - def test_find_next_for_text_element(self): - text = self.tree.find(text="One") - self.assertEqual(text.find_next("b").string, "Two") - self.assertSelects(text.find_all_next("b"), ["Two", "Three"]) - - def test_next_generator(self): - start = self.tree.find(text="Two") - successors = [node for node in start.next_elements] - # There are two successors: the final tag and its text contents. - tag, contents = successors - self.assertEqual(tag['id'], '3') - self.assertEqual(contents, "Three") - -class TestPreviousOperations(ProximityTest): - - def setUp(self): - super(TestPreviousOperations, self).setUp() - self.end = self.tree.find(text="Three") - - def test_previous(self): - self.assertEqual(self.end.previous_element['id'], "3") - self.assertEqual(self.end.previous_element.previous_element, "Two") - - def test_previous_of_first_item_is_none(self): - first = self.tree.find('html') - self.assertEqual(first.previous_element, None) - - def test_previous_of_root_is_none(self): - # The document root is outside the next/previous chain. - # XXX This is broken! - #self.assertEqual(self.tree.previous_element, None) - pass - - def test_find_all_previous(self): - # The tag containing the "Three" node is the predecessor - # of the "Three" node itself, which is why "Three" shows up - # here. - self.assertSelects( - self.end.find_all_previous('b'), ["Three", "Two", "One"]) - self.assertSelects(self.end.find_all_previous(id=1), ["One"]) - - def test_find_previous(self): - self.assertEqual(self.end.find_previous('b')['id'], '3') - self.assertEqual(self.end.find_previous(text="One"), "One") - - def test_find_previous_for_text_element(self): - text = self.tree.find(text="Three") - self.assertEqual(text.find_previous("b").string, "Three") - self.assertSelects( - text.find_all_previous("b"), ["Three", "Two", "One"]) - - def test_previous_generator(self): - start = self.tree.find(text="One") - predecessors = [node for node in start.previous_elements] - - # There are four predecessors: the tag containing "One" - # the tag, the tag, and the tag. - b, body, head, html = predecessors - self.assertEqual(b['id'], '1') - self.assertEqual(body.name, "body") - self.assertEqual(head.name, "head") - self.assertEqual(html.name, "html") - - -class SiblingTest(TreeTest): - - def setUp(self): - super(SiblingTest, self).setUp() - markup = ''' - - - - - - - - - - - ''' - # All that whitespace looks good but makes the tests more - # difficult. Get rid of it. - markup = re.compile("\n\s*").sub("", markup) - self.tree = self.soup(markup) - - -class TestNextSibling(SiblingTest): - - def setUp(self): - super(TestNextSibling, self).setUp() - self.start = self.tree.find(id="1") - - def test_next_sibling_of_root_is_none(self): - self.assertEqual(self.tree.next_sibling, None) - - def test_next_sibling(self): - self.assertEqual(self.start.next_sibling['id'], '2') - self.assertEqual(self.start.next_sibling.next_sibling['id'], '3') - - # Note the difference between next_sibling and next_element. - self.assertEqual(self.start.next_element['id'], '1.1') - - def test_next_sibling_may_not_exist(self): - self.assertEqual(self.tree.html.next_sibling, None) - - nested_span = self.tree.find(id="1.1") - self.assertEqual(nested_span.next_sibling, None) - - last_span = self.tree.find(id="4") - self.assertEqual(last_span.next_sibling, None) - - def test_find_next_sibling(self): - self.assertEqual(self.start.find_next_sibling('span')['id'], '2') - - def test_next_siblings(self): - self.assertSelectsIDs(self.start.find_next_siblings("span"), - ['2', '3', '4']) - - self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3']) - - def test_next_sibling_for_text_element(self): - soup = self.soup("Foobarbaz") - start = soup.find(text="Foo") - self.assertEqual(start.next_sibling.name, 'b') - self.assertEqual(start.next_sibling.next_sibling, 'baz') - - self.assertSelects(start.find_next_siblings('b'), ['bar']) - self.assertEqual(start.find_next_sibling(text="baz"), "baz") - self.assertEqual(start.find_next_sibling(text="nonesuch"), None) - - -class TestPreviousSibling(SiblingTest): - - def setUp(self): - super(TestPreviousSibling, self).setUp() - self.end = self.tree.find(id="4") - - def test_previous_sibling_of_root_is_none(self): - self.assertEqual(self.tree.previous_sibling, None) - - def test_previous_sibling(self): - self.assertEqual(self.end.previous_sibling['id'], '3') - self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2') - - # Note the difference between previous_sibling and previous_element. - self.assertEqual(self.end.previous_element['id'], '3.1') - - def test_previous_sibling_may_not_exist(self): - self.assertEqual(self.tree.html.previous_sibling, None) - - nested_span = self.tree.find(id="1.1") - self.assertEqual(nested_span.previous_sibling, None) - - first_span = self.tree.find(id="1") - self.assertEqual(first_span.previous_sibling, None) - - def test_find_previous_sibling(self): - self.assertEqual(self.end.find_previous_sibling('span')['id'], '3') - - def test_previous_siblings(self): - self.assertSelectsIDs(self.end.find_previous_siblings("span"), - ['3', '2', '1']) - - self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1']) - - def test_previous_sibling_for_text_element(self): - soup = self.soup("Foobarbaz") - start = soup.find(text="baz") - self.assertEqual(start.previous_sibling.name, 'b') - self.assertEqual(start.previous_sibling.previous_sibling, 'Foo') - - self.assertSelects(start.find_previous_siblings('b'), ['bar']) - self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo") - self.assertEqual(start.find_previous_sibling(text="nonesuch"), None) - - -class TestTagCreation(SoupTest): - """Test the ability to create new tags.""" - def test_new_tag(self): - soup = self.soup("") - new_tag = soup.new_tag("foo", bar="baz") - self.assertTrue(isinstance(new_tag, Tag)) - self.assertEqual("foo", new_tag.name) - self.assertEqual(dict(bar="baz"), new_tag.attrs) - self.assertEqual(None, new_tag.parent) - - def test_tag_inherits_self_closing_rules_from_builder(self): - if XML_BUILDER_PRESENT: - xml_soup = BeautifulSoup("", "xml") - xml_br = xml_soup.new_tag("br") - xml_p = xml_soup.new_tag("p") - - # Both the
      and

      tag are empty-element, just because - # they have no contents. - self.assertEqual(b"
      ", xml_br.encode()) - self.assertEqual(b"

      ", xml_p.encode()) - - html_soup = BeautifulSoup("", "html") - html_br = html_soup.new_tag("br") - html_p = html_soup.new_tag("p") - - # The HTML builder users HTML's rules about which tags are - # empty-element tags, and the new tags reflect these rules. - self.assertEqual(b"
      ", html_br.encode()) - self.assertEqual(b"

      ", html_p.encode()) - - def test_new_string_creates_navigablestring(self): - soup = self.soup("") - s = soup.new_string("foo") - self.assertEqual("foo", s) - self.assertTrue(isinstance(s, NavigableString)) - -class TestTreeModification(SoupTest): - - def test_attribute_modification(self): - soup = self.soup('') - soup.a['id'] = 2 - self.assertEqual(soup.decode(), self.document_for('')) - del(soup.a['id']) - self.assertEqual(soup.decode(), self.document_for('')) - soup.a['id2'] = 'foo' - self.assertEqual(soup.decode(), self.document_for('')) - - def test_new_tag_creation(self): - builder = builder_registry.lookup('html')() - soup = self.soup("", builder=builder) - a = Tag(soup, builder, 'a') - ol = Tag(soup, builder, 'ol') - a['href'] = 'http://foo.com/' - soup.body.insert(0, a) - soup.body.insert(1, ol) - self.assertEqual( - soup.body.encode(), - b'
        ') - - def test_append_to_contents_moves_tag(self): - doc = """

        Don't leave me here.

        -

        Don\'t leave!

        """ - soup = self.soup(doc) - second_para = soup.find(id='2') - bold = soup.b - - # Move the tag to the end of the second paragraph. - soup.find(id='2').append(soup.b) - - # The tag is now a child of the second paragraph. - self.assertEqual(bold.parent, second_para) - - self.assertEqual( - soup.decode(), self.document_for( - '

        Don\'t leave me .

        \n' - '

        Don\'t leave!here

        ')) - - def test_replace_with_returns_thing_that_was_replaced(self): - text = "" - soup = self.soup(text) - a = soup.a - new_a = a.replace_with(soup.c) - self.assertEqual(a, new_a) - - def test_unwrap_returns_thing_that_was_replaced(self): - text = "" - soup = self.soup(text) - a = soup.a - new_a = a.unwrap() - self.assertEqual(a, new_a) - - def test_replace_tag_with_itself(self): - text = "Foo" - soup = self.soup(text) - c = soup.c - soup.c.replace_with(c) - self.assertEqual(soup.decode(), self.document_for(text)) - - def test_replace_tag_with_its_parent_raises_exception(self): - text = "" - soup = self.soup(text) - self.assertRaises(ValueError, soup.b.replace_with, soup.a) - - def test_insert_tag_into_itself_raises_exception(self): - text = "" - soup = self.soup(text) - self.assertRaises(ValueError, soup.a.insert, 0, soup.a) - - def test_replace_with_maintains_next_element_throughout(self): - soup = self.soup('

        onethree

        ') - a = soup.a - b = a.contents[0] - # Make it so the tag has two text children. - a.insert(1, "two") - - # Now replace each one with the empty string. - left, right = a.contents - left.replaceWith('') - right.replaceWith('') - - # The tag is still connected to the tree. - self.assertEqual("three", soup.b.string) - - def test_replace_final_node(self): - soup = self.soup("Argh!") - soup.find(text="Argh!").replace_with("Hooray!") - new_text = soup.find(text="Hooray!") - b = soup.b - self.assertEqual(new_text.previous_element, b) - self.assertEqual(new_text.parent, b) - self.assertEqual(new_text.previous_element.next_element, new_text) - self.assertEqual(new_text.next_element, None) - - def test_consecutive_text_nodes(self): - # A builder should never create two consecutive text nodes, - # but if you insert one next to another, Beautiful Soup will - # handle it correctly. - soup = self.soup("Argh!") - soup.b.insert(1, "Hooray!") - - self.assertEqual( - soup.decode(), self.document_for( - "Argh!Hooray!")) - - new_text = soup.find(text="Hooray!") - self.assertEqual(new_text.previous_element, "Argh!") - self.assertEqual(new_text.previous_element.next_element, new_text) - - self.assertEqual(new_text.previous_sibling, "Argh!") - self.assertEqual(new_text.previous_sibling.next_sibling, new_text) - - self.assertEqual(new_text.next_sibling, None) - self.assertEqual(new_text.next_element, soup.c) - - def test_insert_string(self): - soup = self.soup("") - soup.a.insert(0, "bar") - soup.a.insert(0, "foo") - # The string were added to the tag. - self.assertEqual(["foo", "bar"], soup.a.contents) - # And they were converted to NavigableStrings. - self.assertEqual(soup.a.contents[0].next_element, "bar") - - def test_insert_tag(self): - builder = self.default_builder - soup = self.soup( - "Findlady!", builder=builder) - magic_tag = Tag(soup, builder, 'magictag') - magic_tag.insert(0, "the") - soup.a.insert(1, magic_tag) - - self.assertEqual( - soup.decode(), self.document_for( - "Findthelady!")) - - # Make sure all the relationships are hooked up correctly. - b_tag = soup.b - self.assertEqual(b_tag.next_sibling, magic_tag) - self.assertEqual(magic_tag.previous_sibling, b_tag) - - find = b_tag.find(text="Find") - self.assertEqual(find.next_element, magic_tag) - self.assertEqual(magic_tag.previous_element, find) - - c_tag = soup.c - self.assertEqual(magic_tag.next_sibling, c_tag) - self.assertEqual(c_tag.previous_sibling, magic_tag) - - the = magic_tag.find(text="the") - self.assertEqual(the.parent, magic_tag) - self.assertEqual(the.next_element, c_tag) - self.assertEqual(c_tag.previous_element, the) - - def test_append_child_thats_already_at_the_end(self): - data = "" - soup = self.soup(data) - soup.a.append(soup.b) - self.assertEqual(data, soup.decode()) - - def test_move_tag_to_beginning_of_parent(self): - data = "" - soup = self.soup(data) - soup.a.insert(0, soup.d) - self.assertEqual("", soup.decode()) - - def test_insert_works_on_empty_element_tag(self): - # This is a little strange, since most HTML parsers don't allow - # markup like this to come through. But in general, we don't - # know what the parser would or wouldn't have allowed, so - # I'm letting this succeed for now. - soup = self.soup("
        ") - soup.br.insert(1, "Contents") - self.assertEqual(str(soup.br), "
        Contents
        ") - - def test_insert_before(self): - soup = self.soup("foobar") - soup.b.insert_before("BAZ") - soup.a.insert_before("QUUX") - self.assertEqual( - soup.decode(), self.document_for("QUUXfooBAZbar")) - - soup.a.insert_before(soup.b) - self.assertEqual( - soup.decode(), self.document_for("QUUXbarfooBAZ")) - - def test_insert_after(self): - soup = self.soup("foobar") - soup.b.insert_after("BAZ") - soup.a.insert_after("QUUX") - self.assertEqual( - soup.decode(), self.document_for("fooQUUXbarBAZ")) - soup.b.insert_after(soup.a) - self.assertEqual( - soup.decode(), self.document_for("QUUXbarfooBAZ")) - - def test_insert_after_raises_valueerror_if_after_has_no_meaning(self): - soup = self.soup("") - tag = soup.new_tag("a") - string = soup.new_string("") - self.assertRaises(ValueError, string.insert_after, tag) - self.assertRaises(ValueError, soup.insert_after, tag) - self.assertRaises(ValueError, tag.insert_after, tag) - - def test_insert_before_raises_valueerror_if_before_has_no_meaning(self): - soup = self.soup("") - tag = soup.new_tag("a") - string = soup.new_string("") - self.assertRaises(ValueError, string.insert_before, tag) - self.assertRaises(ValueError, soup.insert_before, tag) - self.assertRaises(ValueError, tag.insert_before, tag) - - def test_replace_with(self): - soup = self.soup( - "

        There's no business like show business

        ") - no, show = soup.find_all('b') - show.replace_with(no) - self.assertEqual( - soup.decode(), - self.document_for( - "

        There's business like no business

        ")) - - self.assertEqual(show.parent, None) - self.assertEqual(no.parent, soup.p) - self.assertEqual(no.next_element, "no") - self.assertEqual(no.next_sibling, " business") - - def test_replace_first_child(self): - data = "" - soup = self.soup(data) - soup.b.replace_with(soup.c) - self.assertEqual("", soup.decode()) - - def test_replace_last_child(self): - data = "" - soup = self.soup(data) - soup.c.replace_with(soup.b) - self.assertEqual("", soup.decode()) - - def test_nested_tag_replace_with(self): - soup = self.soup( - """Wereservetherighttorefuseservice""") - - # Replace the entire tag and its contents ("reserve the - # right") with the tag ("refuse"). - remove_tag = soup.b - move_tag = soup.f - remove_tag.replace_with(move_tag) - - self.assertEqual( - soup.decode(), self.document_for( - "Werefusetoservice")) - - # The tag is now an orphan. - self.assertEqual(remove_tag.parent, None) - self.assertEqual(remove_tag.find(text="right").next_element, None) - self.assertEqual(remove_tag.previous_element, None) - self.assertEqual(remove_tag.next_sibling, None) - self.assertEqual(remove_tag.previous_sibling, None) - - # The tag is now connected to the tag. - self.assertEqual(move_tag.parent, soup.a) - self.assertEqual(move_tag.previous_element, "We") - self.assertEqual(move_tag.next_element.next_element, soup.e) - self.assertEqual(move_tag.next_sibling, None) - - # The gap where the tag used to be has been mended, and - # the word "to" is now connected to the tag. - to_text = soup.find(text="to") - g_tag = soup.g - self.assertEqual(to_text.next_element, g_tag) - self.assertEqual(to_text.next_sibling, g_tag) - self.assertEqual(g_tag.previous_element, to_text) - self.assertEqual(g_tag.previous_sibling, to_text) - - def test_unwrap(self): - tree = self.soup(""" -

        Unneeded formatting is unneeded

        - """) - tree.em.unwrap() - self.assertEqual(tree.em, None) - self.assertEqual(tree.p.text, "Unneeded formatting is unneeded") - - def test_wrap(self): - soup = self.soup("I wish I was bold.") - value = soup.string.wrap(soup.new_tag("b")) - self.assertEqual(value.decode(), "I wish I was bold.") - self.assertEqual( - soup.decode(), self.document_for("I wish I was bold.")) - - def test_wrap_extracts_tag_from_elsewhere(self): - soup = self.soup("I wish I was bold.") - soup.b.next_sibling.wrap(soup.b) - self.assertEqual( - soup.decode(), self.document_for("I wish I was bold.")) - - def test_wrap_puts_new_contents_at_the_end(self): - soup = self.soup("I like being bold.I wish I was bold.") - soup.b.next_sibling.wrap(soup.b) - self.assertEqual(2, len(soup.b.contents)) - self.assertEqual( - soup.decode(), self.document_for( - "I like being bold.I wish I was bold.")) - - def test_extract(self): - soup = self.soup( - 'Some content. More content.') - - self.assertEqual(len(soup.body.contents), 3) - extracted = soup.find(id="nav").extract() - - self.assertEqual( - soup.decode(), "Some content. More content.") - self.assertEqual(extracted.decode(), '') - - # The extracted tag is now an orphan. - self.assertEqual(len(soup.body.contents), 2) - self.assertEqual(extracted.parent, None) - self.assertEqual(extracted.previous_element, None) - self.assertEqual(extracted.next_element.next_element, None) - - # The gap where the extracted tag used to be has been mended. - content_1 = soup.find(text="Some content. ") - content_2 = soup.find(text=" More content.") - self.assertEqual(content_1.next_element, content_2) - self.assertEqual(content_1.next_sibling, content_2) - self.assertEqual(content_2.previous_element, content_1) - self.assertEqual(content_2.previous_sibling, content_1) - - def test_extract_distinguishes_between_identical_strings(self): - soup = self.soup("
        foobar") - foo_1 = soup.a.string - bar_1 = soup.b.string - foo_2 = soup.new_string("foo") - bar_2 = soup.new_string("bar") - soup.a.append(foo_2) - soup.b.append(bar_2) - - # Now there are two identical strings in the tag, and two - # in the tag. Let's remove the first "foo" and the second - # "bar". - foo_1.extract() - bar_2.extract() - self.assertEqual(foo_2, soup.a.string) - self.assertEqual(bar_2, soup.b.string) - - def test_clear(self): - """Tag.clear()""" - soup = self.soup("

        String Italicized and another

        ") - # clear using extract() - a = soup.a - soup.p.clear() - self.assertEqual(len(soup.p.contents), 0) - self.assertTrue(hasattr(a, "contents")) - - # clear using decompose() - em = a.em - a.clear(decompose=True) - self.assertFalse(hasattr(em, "contents")) - - def test_string_set(self): - """Tag.string = 'string'""" - soup = self.soup(" ") - soup.a.string = "foo" - self.assertEqual(soup.a.contents, ["foo"]) - soup.b.string = "bar" - self.assertEqual(soup.b.contents, ["bar"]) - - def test_string_set_does_not_affect_original_string(self): - soup = self.soup("foobar") - soup.b.string = soup.c.string - self.assertEqual(soup.a.encode(), b"barbar") - - def test_set_string_preserves_class_of_string(self): - soup = self.soup("") - cdata = CData("foo") - soup.a.string = cdata - self.assertTrue(isinstance(soup.a.string, CData)) - -class TestElementObjects(SoupTest): - """Test various features of element objects.""" - - def test_len(self): - """The length of an element is its number of children.""" - soup = self.soup("123") - - # The BeautifulSoup object itself contains one element: the - # tag. - self.assertEqual(len(soup.contents), 1) - self.assertEqual(len(soup), 1) - - # The tag contains three elements: the text node "1", the - # tag, and the text node "3". - self.assertEqual(len(soup.top), 3) - self.assertEqual(len(soup.top.contents), 3) - - def test_member_access_invokes_find(self): - """Accessing a Python member .foo invokes find('foo')""" - soup = self.soup('') - self.assertEqual(soup.b, soup.find('b')) - self.assertEqual(soup.b.i, soup.find('b').find('i')) - self.assertEqual(soup.a, None) - - def test_deprecated_member_access(self): - soup = self.soup('') - with warnings.catch_warnings(record=True) as w: - tag = soup.bTag - self.assertEqual(soup.b, tag) - self.assertEqual( - '.bTag is deprecated, use .find("b") instead.', - str(w[0].message)) - - def test_has_attr(self): - """has_attr() checks for the presence of an attribute. - - Please note note: has_attr() is different from - __in__. has_attr() checks the tag's attributes and __in__ - checks the tag's chidlren. - """ - soup = self.soup("") - self.assertTrue(soup.foo.has_attr('attr')) - self.assertFalse(soup.foo.has_attr('attr2')) - - - def test_attributes_come_out_in_alphabetical_order(self): - markup = '' - self.assertSoupEquals(markup, '') - - def test_string(self): - # A tag that contains only a text node makes that node - # available as .string. - soup = self.soup("foo") - self.assertEqual(soup.b.string, 'foo') - - def test_empty_tag_has_no_string(self): - # A tag with no children has no .stirng. - soup = self.soup("") - self.assertEqual(soup.b.string, None) - - def test_tag_with_multiple_children_has_no_string(self): - # A tag with no children has no .string. - soup = self.soup("foo") - self.assertEqual(soup.b.string, None) - - soup = self.soup("foobar
        ") - self.assertEqual(soup.b.string, None) - - # Even if all the children are strings, due to trickery, - # it won't work--but this would be a good optimization. - soup = self.soup("foo
        ") - soup.a.insert(1, "bar") - self.assertEqual(soup.a.string, None) - - def test_tag_with_recursive_string_has_string(self): - # A tag with a single child which has a .string inherits that - # .string. - soup = self.soup("foo") - self.assertEqual(soup.a.string, "foo") - self.assertEqual(soup.string, "foo") - - def test_lack_of_string(self): - """Only a tag containing a single text node has a .string.""" - soup = self.soup("feo") - self.assertFalse(soup.b.string) - - soup = self.soup("") - self.assertFalse(soup.b.string) - - def test_all_text(self): - """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated""" - soup = self.soup("ar t ") - self.assertEqual(soup.a.text, "ar t ") - self.assertEqual(soup.a.get_text(strip=True), "art") - self.assertEqual(soup.a.get_text(","), "a,r, , t ") - self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t") - -class TestCDAtaListAttributes(SoupTest): - - """Testing cdata-list attributes like 'class'. - """ - def test_single_value_becomes_list(self): - soup = self.soup("") - self.assertEqual(["foo"],soup.a['class']) - - def test_multiple_values_becomes_list(self): - soup = self.soup("") - self.assertEqual(["foo", "bar"], soup.a['class']) - - def test_multiple_values_separated_by_weird_whitespace(self): - soup = self.soup("") - self.assertEqual(["foo", "bar", "baz"],soup.a['class']) - - def test_attributes_joined_into_string_on_output(self): - soup = self.soup("") - self.assertEqual(b'', soup.a.encode()) - - def test_accept_charset(self): - soup = self.soup('
        ') - self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset']) - - def test_cdata_attribute_applying_only_to_one_tag(self): - data = '' - soup = self.soup(data) - # We saw in another test that accept-charset is a cdata-list - # attribute for the tag. But it's not a cdata-list - # attribute for any other tag. - self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset']) - - -class TestPersistence(SoupTest): - "Testing features like pickle and deepcopy." - - def setUp(self): - super(TestPersistence, self).setUp() - self.page = """ - - - -Beautiful Soup: We called him Tortoise because he taught us. - - - - - - -foo -bar - -""" - self.tree = self.soup(self.page) - - def test_pickle_and_unpickle_identity(self): - # Pickling a tree, then unpickling it, yields a tree identical - # to the original. - dumped = pickle.dumps(self.tree, 2) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.__class__, BeautifulSoup) - self.assertEqual(loaded.decode(), self.tree.decode()) - - def test_deepcopy_identity(self): - # Making a deepcopy of a tree yields an identical tree. - copied = copy.deepcopy(self.tree) - self.assertEqual(copied.decode(), self.tree.decode()) - - def test_unicode_pickle(self): - # A tree containing Unicode characters can be pickled. - html = u"\N{SNOWMAN}" - soup = self.soup(html) - dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) - loaded = pickle.loads(dumped) - self.assertEqual(loaded.decode(), soup.decode()) - - -class TestSubstitutions(SoupTest): - - def test_default_formatter_is_minimal(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( - u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_html(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="html") - self.assertEqual( - decoded, - self.document_for("<<Sacré bleu!>>")) - - def test_formatter_minimal(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter="minimal") - # The < is converted back into < but the e-with-acute is left alone. - self.assertEqual( - decoded, - self.document_for( - u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>")) - - def test_formatter_null(self): - markup = u"<<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>>" - soup = self.soup(markup) - decoded = soup.decode(formatter=None) - # Neither the angle brackets nor the e-with-acute are converted. - # This is not valid HTML, but it's what the user wanted. - self.assertEqual(decoded, - self.document_for(u"<>")) - - def test_formatter_custom(self): - markup = u"<foo>bar" - soup = self.soup(markup) - decoded = soup.decode(formatter = lambda x: x.upper()) - # Instead of normal entity conversion code, the custom - # callable is called on every string. - self.assertEqual( - decoded, - self.document_for(u"BAR")) - - def test_formatter_is_run_on_attribute_values(self): - markup = u'e' - soup = self.soup(markup) - a = soup.a - - expect_minimal = u'e' - - self.assertEqual(expect_minimal, a.decode()) - self.assertEqual(expect_minimal, a.decode(formatter="minimal")) - - expect_html = u'e' - self.assertEqual(expect_html, a.decode(formatter="html")) - - self.assertEqual(markup, a.decode(formatter=None)) - expect_upper = u'E' - self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) - - def test_prettify_accepts_formatter(self): - soup = BeautifulSoup("foo") - pretty = soup.prettify(formatter = lambda x: x.upper()) - self.assertTrue("FOO" in pretty) - - def test_prettify_outputs_unicode_by_default(self): - soup = self.soup("") - self.assertEqual(unicode, type(soup.prettify())) - - def test_prettify_can_encode_data(self): - soup = self.soup("") - self.assertEqual(bytes, type(soup.prettify("utf-8"))) - - def test_html_entity_substitution_off_by_default(self): - markup = u"Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!" - soup = self.soup(markup) - encoded = soup.b.encode("utf-8") - self.assertEqual(encoded, markup.encode('utf-8')) - - def test_encoding_substitution(self): - # Here's the tag saying that a document is - # encoded in Shift-JIS. - meta_tag = ('') - soup = self.soup(meta_tag) - - # Parse the document, and the charset apprears unchanged. - self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis') - - # Encode the document into some encoding, and the encoding is - # substituted into the meta tag. - utf_8 = soup.encode("utf-8") - self.assertTrue(b"charset=utf-8" in utf_8) - - euc_jp = soup.encode("euc_jp") - self.assertTrue(b"charset=euc_jp" in euc_jp) - - shift_jis = soup.encode("shift-jis") - self.assertTrue(b"charset=shift-jis" in shift_jis) - - utf_16_u = soup.encode("utf-16").decode("utf-16") - self.assertTrue("charset=utf-16" in utf_16_u) - - def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self): - markup = ('
        foo
        ') - - # Beautiful Soup used to try to rewrite the meta tag even if the - # meta tag got filtered out by the strainer. This test makes - # sure that doesn't happen. - strainer = SoupStrainer('pre') - soup = self.soup(markup, parse_only=strainer) - self.assertEqual(soup.contents[0].name, 'pre') - -class TestEncoding(SoupTest): - """Test the ability to encode objects into strings.""" - - def test_unicode_string_can_be_encoded(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.string.encode("utf-8"), - u"\N{SNOWMAN}".encode("utf-8")) - - def test_tag_containing_unicode_string_can_be_encoded(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - soup.b.encode("utf-8"), html.encode("utf-8")) - - def test_encoding_substitutes_unrecognized_characters_by_default(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(soup.b.encode("ascii"), b"") - - def test_encoding_can_be_made_strict(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertRaises( - UnicodeEncodeError, soup.encode, "ascii", errors="strict") - - def test_decode_contents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) - - def test_encode_contents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( - encoding="utf8")) - - def test_deprecated_renderContents(self): - html = u"\N{SNOWMAN}" - soup = self.soup(html) - self.assertEqual( - u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) - -class TestNavigableStringSubclasses(SoupTest): - - def test_cdata(self): - # None of the current builders turn CDATA sections into CData - # objects, but you can create them manually. - soup = self.soup("") - cdata = CData("foo") - soup.insert(1, cdata) - self.assertEqual(str(soup), "") - self.assertEqual(soup.find(text="foo"), "foo") - self.assertEqual(soup.contents[0], "foo") - - def test_cdata_is_never_formatted(self): - """Text inside a CData object is passed into the formatter. - - But the return value is ignored. - """ - - self.count = 0 - def increment(*args): - self.count += 1 - return "BITTER FAILURE" - - soup = self.soup("") - cdata = CData("<><><>") - soup.insert(1, cdata) - self.assertEqual( - b"<><>]]>", soup.encode(formatter=increment)) - self.assertEqual(1, self.count) - - def test_doctype_ends_in_newline(self): - # Unlike other NavigableString subclasses, a DOCTYPE always ends - # in a newline. - doctype = Doctype("foo") - soup = self.soup("") - soup.insert(1, doctype) - self.assertEqual(soup.encode(), b"\n") - - -class TestSoupSelector(TreeTest): - - HTML = """ - - - -The title - - - - -
        -
        -

        An H1

        -

        Some text

        -

        Some more text

        -

        An H2

        -

        Another

        -Bob -

        Another H2

        -me - -span1a1 -span1a2 test - -span2a1 - - - -
        -

        English

        -

        English UK

        -

        English US

        -

        French

        -
        - - -""" - - def setUp(self): - self.soup = BeautifulSoup(self.HTML) - - def assertSelects(self, selector, expected_ids): - el_ids = [el['id'] for el in self.soup.select(selector)] - el_ids.sort() - expected_ids.sort() - self.assertEqual(expected_ids, el_ids, - "Selector %s, expected [%s], got [%s]" % ( - selector, ', '.join(expected_ids), ', '.join(el_ids) - ) - ) - - assertSelect = assertSelects - - def assertSelectMultiple(self, *tests): - for selector, expected_ids in tests: - self.assertSelect(selector, expected_ids) - - def test_one_tag_one(self): - els = self.soup.select('title') - self.assertEqual(len(els), 1) - self.assertEqual(els[0].name, 'title') - self.assertEqual(els[0].contents, [u'The title']) - - def test_one_tag_many(self): - els = self.soup.select('div') - self.assertEqual(len(els), 3) - for div in els: - self.assertEqual(div.name, 'div') - - def test_tag_in_tag_one(self): - els = self.soup.select('div div') - self.assertSelects('div div', ['inner']) - - def test_tag_in_tag_many(self): - for selector in ('html div', 'html body div', 'body div'): - self.assertSelects(selector, ['main', 'inner', 'footer']) - - def test_tag_no_match(self): - self.assertEqual(len(self.soup.select('del')), 0) - - def test_invalid_tag(self): - self.assertEqual(len(self.soup.select('tag%t')), 0) - - def test_header_tags(self): - self.assertSelectMultiple( - ('h1', ['header1']), - ('h2', ['header2', 'header3']), - ) - - def test_class_one(self): - for selector in ('.onep', 'p.onep', 'html p.onep'): - els = self.soup.select(selector) - self.assertEqual(len(els), 1) - self.assertEqual(els[0].name, 'p') - self.assertEqual(els[0]['class'], ['onep']) - - def test_class_mismatched_tag(self): - els = self.soup.select('div.onep') - self.assertEqual(len(els), 0) - - def test_one_id(self): - for selector in ('div#inner', '#inner', 'div div#inner'): - self.assertSelects(selector, ['inner']) - - def test_bad_id(self): - els = self.soup.select('#doesnotexist') - self.assertEqual(len(els), 0) - - def test_items_in_id(self): - els = self.soup.select('div#inner p') - self.assertEqual(len(els), 3) - for el in els: - self.assertEqual(el.name, 'p') - self.assertEqual(els[1]['class'], ['onep']) - self.assertFalse(els[0].has_key('class')) - - def test_a_bunch_of_emptys(self): - for selector in ('div#main del', 'div#main div.oops', 'div div#main'): - self.assertEqual(len(self.soup.select(selector)), 0) - - def test_multi_class_support(self): - for selector in ('.class1', 'p.class1', '.class2', 'p.class2', - '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'): - self.assertSelects(selector, ['pmulti']) - - def test_multi_class_selection(self): - for selector in ('.class1.class3', '.class3.class2', - '.class1.class2.class3'): - self.assertSelects(selector, ['pmulti']) - - def test_child_selector(self): - self.assertSelects('.s1 > a', ['s1a1', 's1a2']) - self.assertSelects('.s1 > a span', ['s1a2s1']) - - def test_attribute_equals(self): - self.assertSelectMultiple( - ('p[class="onep"]', ['p1']), - ('p[id="p1"]', ['p1']), - ('[class="onep"]', ['p1']), - ('[id="p1"]', ['p1']), - ('link[rel="stylesheet"]', ['l1']), - ('link[type="text/css"]', ['l1']), - ('link[href="blah.css"]', ['l1']), - ('link[href="no-blah.css"]', []), - ('[rel="stylesheet"]', ['l1']), - ('[type="text/css"]', ['l1']), - ('[href="blah.css"]', ['l1']), - ('[href="no-blah.css"]', []), - ('p[href="no-blah.css"]', []), - ('[href="no-blah.css"]', []), - ) - - def test_attribute_tilde(self): - self.assertSelectMultiple( - ('p[class~="class1"]', ['pmulti']), - ('p[class~="class2"]', ['pmulti']), - ('p[class~="class3"]', ['pmulti']), - ('[class~="class1"]', ['pmulti']), - ('[class~="class2"]', ['pmulti']), - ('[class~="class3"]', ['pmulti']), - ('a[rel~="friend"]', ['bob']), - ('a[rel~="met"]', ['bob']), - ('[rel~="friend"]', ['bob']), - ('[rel~="met"]', ['bob']), - ) - - def test_attribute_startswith(self): - self.assertSelectMultiple( - ('[rel^="style"]', ['l1']), - ('link[rel^="style"]', ['l1']), - ('notlink[rel^="notstyle"]', []), - ('[rel^="notstyle"]', []), - ('link[rel^="notstyle"]', []), - ('link[href^="bla"]', ['l1']), - ('a[href^="http://"]', ['bob', 'me']), - ('[href^="http://"]', ['bob', 'me']), - ('[id^="p"]', ['pmulti', 'p1']), - ('[id^="m"]', ['me', 'main']), - ('div[id^="m"]', ['main']), - ('a[id^="m"]', ['me']), - ) - - def test_attribute_endswith(self): - self.assertSelectMultiple( - ('[href$=".css"]', ['l1']), - ('link[href$=".css"]', ['l1']), - ('link[id$="1"]', ['l1']), - ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']), - ('div[id$="1"]', []), - ('[id$="noending"]', []), - ) - - def test_attribute_contains(self): - self.assertSelectMultiple( - # From test_attribute_startswith - ('[rel*="style"]', ['l1']), - ('link[rel*="style"]', ['l1']), - ('notlink[rel*="notstyle"]', []), - ('[rel*="notstyle"]', []), - ('link[rel*="notstyle"]', []), - ('link[href*="bla"]', ['l1']), - ('a[href*="http://"]', ['bob', 'me']), - ('[href*="http://"]', ['bob', 'me']), - ('[id*="p"]', ['pmulti', 'p1']), - ('div[id*="m"]', ['main']), - ('a[id*="m"]', ['me']), - # From test_attribute_endswith - ('[href*=".css"]', ['l1']), - ('link[href*=".css"]', ['l1']), - ('link[id*="1"]', ['l1']), - ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']), - ('div[id*="1"]', []), - ('[id*="noending"]', []), - # New for this test - ('[href*="."]', ['bob', 'me', 'l1']), - ('a[href*="."]', ['bob', 'me']), - ('link[href*="."]', ['l1']), - ('div[id*="n"]', ['main', 'inner']), - ('div[id*="nn"]', ['inner']), - ) - - def test_attribute_exact_or_hypen(self): - self.assertSelectMultiple( - ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']), - ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']), - ('p[lang|="fr"]', ['lang-fr']), - ('p[lang|="gb"]', []), - ) - - def test_attribute_exists(self): - self.assertSelectMultiple( - ('[rel]', ['l1', 'bob', 'me']), - ('link[rel]', ['l1']), - ('a[rel]', ['bob', 'me']), - ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']), - ('p[class]', ['p1', 'pmulti']), - ('[blah]', []), - ('p[blah]', []), - ) - - def test_select_on_element(self): - # Other tests operate on the tree; this operates on an element - # within the tree. - inner = self.soup.find("div", id="main") - selected = inner.select("div") - # The
        tag was selected. The % elif headphones.CURRENT_VERSION != headphones.LATEST_VERSION and headphones.INSTALL_TYPE != 'win':
        - A newer version is available. You're ${headphones.COMMITS_BEHIND} commits behind. Update or Close + A newer version is available. You're ${headphones.COMMITS_BEHIND} commits behind. Update or Close
        % endif diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html index d599471b..bc50b794 100644 --- a/data/interfaces/default/config.html +++ b/data/interfaces/default/config.html @@ -312,7 +312,7 @@ m<%inherit file="base.html"/>
        - +
        diff --git a/headphones/versioncheck.py b/headphones/versioncheck.py index 429ec8b9..e1440fd8 100644 --- a/headphones/versioncheck.py +++ b/headphones/versioncheck.py @@ -20,7 +20,7 @@ from headphones import logger, version import lib.simplejson as simplejson -user = "rembo10" +user = "AdeHub" branch = "master" def runGit(args): diff --git a/html5lib/__init__.py b/html5lib/__init__.py new file mode 100644 index 00000000..16537aad --- /dev/null +++ b/html5lib/__init__.py @@ -0,0 +1,17 @@ +""" +HTML parsing library based on the WHATWG "HTML5" +specification. The parser is designed to be compatible with existing +HTML found in the wild and implements well-defined error recovery that +is largely compatible with modern desktop web browsers. + +Example usage: + +import html5lib +f = open("my_document.html") +tree = html5lib.parse(f) +""" +__version__ = "0.95-dev" +from html5parser import HTMLParser, parse, parseFragment +from treebuilders import getTreeBuilder +from treewalkers import getTreeWalker +from serializer import serialize diff --git a/html5lib/constants.py b/html5lib/constants.py new file mode 100644 index 00000000..b533018e --- /dev/null +++ b/html5lib/constants.py @@ -0,0 +1,3085 @@ +import string, gettext +_ = gettext.gettext + +try: + frozenset +except NameError: + # Import from the sets module for python 2.3 + from sets import Set as set + from sets import ImmutableSet as frozenset + +EOF = None + +E = { + "null-character": + _(u"Null character in input stream, replaced with U+FFFD."), + "invalid-codepoint": + _(u"Invalid codepoint in stream."), + "incorrectly-placed-solidus": + _(u"Solidus (/) incorrectly placed in tag."), + "incorrect-cr-newline-entity": + _(u"Incorrect CR newline entity, replaced with LF."), + "illegal-windows-1252-entity": + _(u"Entity used with illegal number (windows-1252 reference)."), + "cant-convert-numeric-entity": + _(u"Numeric entity couldn't be converted to character " + u"(codepoint U+%(charAsInt)08x)."), + "illegal-codepoint-for-numeric-entity": + _(u"Numeric entity represents an illegal codepoint: " + u"U+%(charAsInt)08x."), + "numeric-entity-without-semicolon": + _(u"Numeric entity didn't end with ';'."), + "expected-numeric-entity-but-got-eof": + _(u"Numeric entity expected. Got end of file instead."), + "expected-numeric-entity": + _(u"Numeric entity expected but none found."), + "named-entity-without-semicolon": + _(u"Named entity didn't end with ';'."), + "expected-named-entity": + _(u"Named entity expected. Got none."), + "attributes-in-end-tag": + _(u"End tag contains unexpected attributes."), + 'self-closing-flag-on-end-tag': + _(u"End tag contains unexpected self-closing flag."), + "expected-tag-name-but-got-right-bracket": + _(u"Expected tag name. Got '>' instead."), + "expected-tag-name-but-got-question-mark": + _(u"Expected tag name. Got '?' instead. (HTML doesn't " + u"support processing instructions.)"), + "expected-tag-name": + _(u"Expected tag name. Got something else instead"), + "expected-closing-tag-but-got-right-bracket": + _(u"Expected closing tag. Got '>' instead. Ignoring ''."), + "expected-closing-tag-but-got-eof": + _(u"Expected closing tag. Unexpected end of file."), + "expected-closing-tag-but-got-char": + _(u"Expected closing tag. Unexpected character '%(data)s' found."), + "eof-in-tag-name": + _(u"Unexpected end of file in the tag name."), + "expected-attribute-name-but-got-eof": + _(u"Unexpected end of file. Expected attribute name instead."), + "eof-in-attribute-name": + _(u"Unexpected end of file in attribute name."), + "invalid-character-in-attribute-name": + _(u"Invalid chracter in attribute name"), + "duplicate-attribute": + _(u"Dropped duplicate attribute on tag."), + "expected-end-of-tag-name-but-got-eof": + _(u"Unexpected end of file. Expected = or end of tag."), + "expected-attribute-value-but-got-eof": + _(u"Unexpected end of file. Expected attribute value."), + "expected-attribute-value-but-got-right-bracket": + _(u"Expected attribute value. Got '>' instead."), + 'equals-in-unquoted-attribute-value': + _(u"Unexpected = in unquoted attribute"), + 'unexpected-character-in-unquoted-attribute-value': + _(u"Unexpected character in unquoted attribute"), + "invalid-character-after-attribute-name": + _(u"Unexpected character after attribute name."), + "unexpected-character-after-attribute-value": + _(u"Unexpected character after attribute value."), + "eof-in-attribute-value-double-quote": + _(u"Unexpected end of file in attribute value (\")."), + "eof-in-attribute-value-single-quote": + _(u"Unexpected end of file in attribute value (')."), + "eof-in-attribute-value-no-quotes": + _(u"Unexpected end of file in attribute value."), + "unexpected-EOF-after-solidus-in-tag": + _(u"Unexpected end of file in tag. Expected >"), + "unexpected-character-after-soldius-in-tag": + _(u"Unexpected character after / in tag. Expected >"), + "expected-dashes-or-doctype": + _(u"Expected '--' or 'DOCTYPE'. Not found."), + "unexpected-bang-after-double-dash-in-comment": + _(u"Unexpected ! after -- in comment"), + "unexpected-space-after-double-dash-in-comment": + _(u"Unexpected space after -- in comment"), + "incorrect-comment": + _(u"Incorrect comment."), + "eof-in-comment": + _(u"Unexpected end of file in comment."), + "eof-in-comment-end-dash": + _(u"Unexpected end of file in comment (-)"), + "unexpected-dash-after-double-dash-in-comment": + _(u"Unexpected '-' after '--' found in comment."), + "eof-in-comment-double-dash": + _(u"Unexpected end of file in comment (--)."), + "eof-in-comment-end-space-state": + _(u"Unexpected end of file in comment."), + "eof-in-comment-end-bang-state": + _(u"Unexpected end of file in comment."), + "unexpected-char-in-comment": + _(u"Unexpected character in comment found."), + "need-space-after-doctype": + _(u"No space after literal string 'DOCTYPE'."), + "expected-doctype-name-but-got-right-bracket": + _(u"Unexpected > character. Expected DOCTYPE name."), + "expected-doctype-name-but-got-eof": + _(u"Unexpected end of file. Expected DOCTYPE name."), + "eof-in-doctype-name": + _(u"Unexpected end of file in DOCTYPE name."), + "eof-in-doctype": + _(u"Unexpected end of file in DOCTYPE."), + "expected-space-or-right-bracket-in-doctype": + _(u"Expected space or '>'. Got '%(data)s'"), + "unexpected-end-of-doctype": + _(u"Unexpected end of DOCTYPE."), + "unexpected-char-in-doctype": + _(u"Unexpected character in DOCTYPE."), + "eof-in-innerhtml": + _(u"XXX innerHTML EOF"), + "unexpected-doctype": + _(u"Unexpected DOCTYPE. Ignored."), + "non-html-root": + _(u"html needs to be the first start tag."), + "expected-doctype-but-got-eof": + _(u"Unexpected End of file. Expected DOCTYPE."), + "unknown-doctype": + _(u"Erroneous DOCTYPE."), + "expected-doctype-but-got-chars": + _(u"Unexpected non-space characters. Expected DOCTYPE."), + "expected-doctype-but-got-start-tag": + _(u"Unexpected start tag (%(name)s). Expected DOCTYPE."), + "expected-doctype-but-got-end-tag": + _(u"Unexpected end tag (%(name)s). Expected DOCTYPE."), + "end-tag-after-implied-root": + _(u"Unexpected end tag (%(name)s) after the (implied) root element."), + "expected-named-closing-tag-but-got-eof": + _(u"Unexpected end of file. Expected end tag (%(name)s)."), + "two-heads-are-not-better-than-one": + _(u"Unexpected start tag head in existing head. Ignored."), + "unexpected-end-tag": + _(u"Unexpected end tag (%(name)s). Ignored."), + "unexpected-start-tag-out-of-my-head": + _(u"Unexpected start tag (%(name)s) that can be in head. Moved."), + "unexpected-start-tag": + _(u"Unexpected start tag (%(name)s)."), + "missing-end-tag": + _(u"Missing end tag (%(name)s)."), + "missing-end-tags": + _(u"Missing end tags (%(name)s)."), + "unexpected-start-tag-implies-end-tag": + _(u"Unexpected start tag (%(startName)s) " + u"implies end tag (%(endName)s)."), + "unexpected-start-tag-treated-as": + _(u"Unexpected start tag (%(originalName)s). Treated as %(newName)s."), + "deprecated-tag": + _(u"Unexpected start tag %(name)s. Don't use it!"), + "unexpected-start-tag-ignored": + _(u"Unexpected start tag %(name)s. Ignored."), + "expected-one-end-tag-but-got-another": + _(u"Unexpected end tag (%(gotName)s). " + u"Missing end tag (%(expectedName)s)."), + "end-tag-too-early": + _(u"End tag (%(name)s) seen too early. Expected other end tag."), + "end-tag-too-early-named": + _(u"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."), + "end-tag-too-early-ignored": + _(u"End tag (%(name)s) seen too early. Ignored."), + "adoption-agency-1.1": + _(u"End tag (%(name)s) violates step 1, " + u"paragraph 1 of the adoption agency algorithm."), + "adoption-agency-1.2": + _(u"End tag (%(name)s) violates step 1, " + u"paragraph 2 of the adoption agency algorithm."), + "adoption-agency-1.3": + _(u"End tag (%(name)s) violates step 1, " + u"paragraph 3 of the adoption agency algorithm."), + "unexpected-end-tag-treated-as": + _(u"Unexpected end tag (%(originalName)s). Treated as %(newName)s."), + "no-end-tag": + _(u"This element (%(name)s) has no end tag."), + "unexpected-implied-end-tag-in-table": + _(u"Unexpected implied end tag (%(name)s) in the table phase."), + "unexpected-implied-end-tag-in-table-body": + _(u"Unexpected implied end tag (%(name)s) in the table body phase."), + "unexpected-char-implies-table-voodoo": + _(u"Unexpected non-space characters in " + u"table context caused voodoo mode."), + "unexpected-hidden-input-in-table": + _(u"Unexpected input with type hidden in table context."), + "unexpected-form-in-table": + _(u"Unexpected form in table context."), + "unexpected-start-tag-implies-table-voodoo": + _(u"Unexpected start tag (%(name)s) in " + u"table context caused voodoo mode."), + "unexpected-end-tag-implies-table-voodoo": + _(u"Unexpected end tag (%(name)s) in " + u"table context caused voodoo mode."), + "unexpected-cell-in-table-body": + _(u"Unexpected table cell start tag (%(name)s) " + u"in the table body phase."), + "unexpected-cell-end-tag": + _(u"Got table cell end tag (%(name)s) " + u"while required end tags are missing."), + "unexpected-end-tag-in-table-body": + _(u"Unexpected end tag (%(name)s) in the table body phase. Ignored."), + "unexpected-implied-end-tag-in-table-row": + _(u"Unexpected implied end tag (%(name)s) in the table row phase."), + "unexpected-end-tag-in-table-row": + _(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."), + "unexpected-select-in-select": + _(u"Unexpected select start tag in the select phase " + u"treated as select end tag."), + "unexpected-input-in-select": + _(u"Unexpected input start tag in the select phase."), + "unexpected-start-tag-in-select": + _(u"Unexpected start tag token (%(name)s in the select phase. " + u"Ignored."), + "unexpected-end-tag-in-select": + _(u"Unexpected end tag (%(name)s) in the select phase. Ignored."), + "unexpected-table-element-start-tag-in-select-in-table": + _(u"Unexpected table element start tag (%(name)s) in the select in table phase."), + "unexpected-table-element-end-tag-in-select-in-table": + _(u"Unexpected table element end tag (%(name)s) in the select in table phase."), + "unexpected-char-after-body": + _(u"Unexpected non-space characters in the after body phase."), + "unexpected-start-tag-after-body": + _(u"Unexpected start tag token (%(name)s)" + u" in the after body phase."), + "unexpected-end-tag-after-body": + _(u"Unexpected end tag token (%(name)s)" + u" in the after body phase."), + "unexpected-char-in-frameset": + _(u"Unepxected characters in the frameset phase. Characters ignored."), + "unexpected-start-tag-in-frameset": + _(u"Unexpected start tag token (%(name)s)" + u" in the frameset phase. Ignored."), + "unexpected-frameset-in-frameset-innerhtml": + _(u"Unexpected end tag token (frameset) " + u"in the frameset phase (innerHTML)."), + "unexpected-end-tag-in-frameset": + _(u"Unexpected end tag token (%(name)s)" + u" in the frameset phase. Ignored."), + "unexpected-char-after-frameset": + _(u"Unexpected non-space characters in the " + u"after frameset phase. Ignored."), + "unexpected-start-tag-after-frameset": + _(u"Unexpected start tag (%(name)s)" + u" in the after frameset phase. Ignored."), + "unexpected-end-tag-after-frameset": + _(u"Unexpected end tag (%(name)s)" + u" in the after frameset phase. Ignored."), + "unexpected-end-tag-after-body-innerhtml": + _(u"Unexpected end tag after body(innerHtml)"), + "expected-eof-but-got-char": + _(u"Unexpected non-space characters. Expected end of file."), + "expected-eof-but-got-start-tag": + _(u"Unexpected start tag (%(name)s)" + u". Expected end of file."), + "expected-eof-but-got-end-tag": + _(u"Unexpected end tag (%(name)s)" + u". Expected end of file."), + "eof-in-table": + _(u"Unexpected end of file. Expected table content."), + "eof-in-select": + _(u"Unexpected end of file. Expected select content."), + "eof-in-frameset": + _(u"Unexpected end of file. Expected frameset content."), + "eof-in-script-in-script": + _(u"Unexpected end of file. Expected script content."), + "eof-in-foreign-lands": + _(u"Unexpected end of file. Expected foreign content"), + "non-void-element-with-trailing-solidus": + _(u"Trailing solidus not allowed on element %(name)s"), + "unexpected-html-element-in-foreign-content": + _(u"Element %(name)s not allowed in a non-html context"), + "unexpected-end-tag-before-html": + _(u"Unexpected end tag (%(name)s) before html."), + "XXX-undefined-error": + (u"Undefined error (this sucks and should be fixed)"), +} + +namespaces = { + "html":"http://www.w3.org/1999/xhtml", + "mathml":"http://www.w3.org/1998/Math/MathML", + "svg":"http://www.w3.org/2000/svg", + "xlink":"http://www.w3.org/1999/xlink", + "xml":"http://www.w3.org/XML/1998/namespace", + "xmlns":"http://www.w3.org/2000/xmlns/" +} + +scopingElements = frozenset(( + (namespaces["html"], "applet"), + (namespaces["html"], "caption"), + (namespaces["html"], "html"), + (namespaces["html"], "marquee"), + (namespaces["html"], "object"), + (namespaces["html"], "table"), + (namespaces["html"], "td"), + (namespaces["html"], "th"), + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext"), + (namespaces["mathml"], "annotation-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title"), +)) + +formattingElements = frozenset(( + (namespaces["html"], "a"), + (namespaces["html"], "b"), + (namespaces["html"], "big"), + (namespaces["html"], "code"), + (namespaces["html"], "em"), + (namespaces["html"], "font"), + (namespaces["html"], "i"), + (namespaces["html"], "nobr"), + (namespaces["html"], "s"), + (namespaces["html"], "small"), + (namespaces["html"], "strike"), + (namespaces["html"], "strong"), + (namespaces["html"], "tt"), + (namespaces["html"], "u") +)) + +specialElements = frozenset(( + (namespaces["html"], "address"), + (namespaces["html"], "applet"), + (namespaces["html"], "area"), + (namespaces["html"], "article"), + (namespaces["html"], "aside"), + (namespaces["html"], "base"), + (namespaces["html"], "basefont"), + (namespaces["html"], "bgsound"), + (namespaces["html"], "blockquote"), + (namespaces["html"], "body"), + (namespaces["html"], "br"), + (namespaces["html"], "button"), + (namespaces["html"], "caption"), + (namespaces["html"], "center"), + (namespaces["html"], "col"), + (namespaces["html"], "colgroup"), + (namespaces["html"], "command"), + (namespaces["html"], "dd"), + (namespaces["html"], "details"), + (namespaces["html"], "dir"), + (namespaces["html"], "div"), + (namespaces["html"], "dl"), + (namespaces["html"], "dt"), + (namespaces["html"], "embed"), + (namespaces["html"], "fieldset"), + (namespaces["html"], "figure"), + (namespaces["html"], "footer"), + (namespaces["html"], "form"), + (namespaces["html"], "frame"), + (namespaces["html"], "frameset"), + (namespaces["html"], "h1"), + (namespaces["html"], "h2"), + (namespaces["html"], "h3"), + (namespaces["html"], "h4"), + (namespaces["html"], "h5"), + (namespaces["html"], "h6"), + (namespaces["html"], "head"), + (namespaces["html"], "header"), + (namespaces["html"], "hr"), + (namespaces["html"], "html"), + (namespaces["html"], "iframe"), + # Note that image is commented out in the spec as "this isn't an + # element that can end up on the stack, so it doesn't matter," + (namespaces["html"], "image"), + (namespaces["html"], "img"), + (namespaces["html"], "input"), + (namespaces["html"], "isindex"), + (namespaces["html"], "li"), + (namespaces["html"], "link"), + (namespaces["html"], "listing"), + (namespaces["html"], "marquee"), + (namespaces["html"], "menu"), + (namespaces["html"], "meta"), + (namespaces["html"], "nav"), + (namespaces["html"], "noembed"), + (namespaces["html"], "noframes"), + (namespaces["html"], "noscript"), + (namespaces["html"], "object"), + (namespaces["html"], "ol"), + (namespaces["html"], "p"), + (namespaces["html"], "param"), + (namespaces["html"], "plaintext"), + (namespaces["html"], "pre"), + (namespaces["html"], "script"), + (namespaces["html"], "section"), + (namespaces["html"], "select"), + (namespaces["html"], "style"), + (namespaces["html"], "table"), + (namespaces["html"], "tbody"), + (namespaces["html"], "td"), + (namespaces["html"], "textarea"), + (namespaces["html"], "tfoot"), + (namespaces["html"], "th"), + (namespaces["html"], "thead"), + (namespaces["html"], "title"), + (namespaces["html"], "tr"), + (namespaces["html"], "ul"), + (namespaces["html"], "wbr"), + (namespaces["html"], "xmp"), + (namespaces["svg"], "foreignObject") +)) + +htmlIntegrationPointElements = frozenset(( + (namespaces["mathml"], "annotaion-xml"), + (namespaces["svg"], "foreignObject"), + (namespaces["svg"], "desc"), + (namespaces["svg"], "title") +)) + +mathmlTextIntegrationPointElements = frozenset(( + (namespaces["mathml"], "mi"), + (namespaces["mathml"], "mo"), + (namespaces["mathml"], "mn"), + (namespaces["mathml"], "ms"), + (namespaces["mathml"], "mtext") +)) + +spaceCharacters = frozenset(( + u"\t", + u"\n", + u"\u000C", + u" ", + u"\r" +)) + +tableInsertModeElements = frozenset(( + "table", + "tbody", + "tfoot", + "thead", + "tr" +)) + +asciiLowercase = frozenset(string.ascii_lowercase) +asciiUppercase = frozenset(string.ascii_uppercase) +asciiLetters = frozenset(string.ascii_letters) +digits = frozenset(string.digits) +hexDigits = frozenset(string.hexdigits) + +asciiUpper2Lower = dict([(ord(c),ord(c.lower())) + for c in string.ascii_uppercase]) + +# Heading elements need to be ordered +headingElements = ( + "h1", + "h2", + "h3", + "h4", + "h5", + "h6" +) + +voidElements = frozenset(( + "base", + "command", + "event-source", + "link", + "meta", + "hr", + "br", + "img", + "embed", + "param", + "area", + "col", + "input", + "source", + "track" +)) + +cdataElements = frozenset(('title', 'textarea')) + +rcdataElements = frozenset(( + 'style', + 'script', + 'xmp', + 'iframe', + 'noembed', + 'noframes', + 'noscript' +)) + +booleanAttributes = { + "": frozenset(("irrelevant",)), + "style": frozenset(("scoped",)), + "img": frozenset(("ismap",)), + "audio": frozenset(("autoplay","controls")), + "video": frozenset(("autoplay","controls")), + "script": frozenset(("defer", "async")), + "details": frozenset(("open",)), + "datagrid": frozenset(("multiple", "disabled")), + "command": frozenset(("hidden", "disabled", "checked", "default")), + "hr": frozenset(("noshade")), + "menu": frozenset(("autosubmit",)), + "fieldset": frozenset(("disabled", "readonly")), + "option": frozenset(("disabled", "readonly", "selected")), + "optgroup": frozenset(("disabled", "readonly")), + "button": frozenset(("disabled", "autofocus")), + "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")), + "select": frozenset(("disabled", "readonly", "autofocus", "multiple")), + "output": frozenset(("disabled", "readonly")), +} + +# entitiesWindows1252 has to be _ordered_ and needs to have an index. It +# therefore can't be a frozenset. +entitiesWindows1252 = ( + 8364, # 0x80 0x20AC EURO SIGN + 65533, # 0x81 UNDEFINED + 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK + 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK + 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK + 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS + 8224, # 0x86 0x2020 DAGGER + 8225, # 0x87 0x2021 DOUBLE DAGGER + 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT + 8240, # 0x89 0x2030 PER MILLE SIGN + 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON + 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK + 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE + 65533, # 0x8D UNDEFINED + 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON + 65533, # 0x8F UNDEFINED + 65533, # 0x90 UNDEFINED + 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK + 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK + 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK + 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK + 8226, # 0x95 0x2022 BULLET + 8211, # 0x96 0x2013 EN DASH + 8212, # 0x97 0x2014 EM DASH + 732, # 0x98 0x02DC SMALL TILDE + 8482, # 0x99 0x2122 TRADE MARK SIGN + 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON + 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE + 65533, # 0x9D UNDEFINED + 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON + 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS +) + +xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;')) + +entities = { + "AElig": u"\xc6", + "AElig;": u"\xc6", + "AMP": u"&", + "AMP;": u"&", + "Aacute": u"\xc1", + "Aacute;": u"\xc1", + "Abreve;": u"\u0102", + "Acirc": u"\xc2", + "Acirc;": u"\xc2", + "Acy;": u"\u0410", + "Afr;": u"\U0001d504", + "Agrave": u"\xc0", + "Agrave;": u"\xc0", + "Alpha;": u"\u0391", + "Amacr;": u"\u0100", + "And;": u"\u2a53", + "Aogon;": u"\u0104", + "Aopf;": u"\U0001d538", + "ApplyFunction;": u"\u2061", + "Aring": u"\xc5", + "Aring;": u"\xc5", + "Ascr;": u"\U0001d49c", + "Assign;": u"\u2254", + "Atilde": u"\xc3", + "Atilde;": u"\xc3", + "Auml": u"\xc4", + "Auml;": u"\xc4", + "Backslash;": u"\u2216", + "Barv;": u"\u2ae7", + "Barwed;": u"\u2306", + "Bcy;": u"\u0411", + "Because;": u"\u2235", + "Bernoullis;": u"\u212c", + "Beta;": u"\u0392", + "Bfr;": u"\U0001d505", + "Bopf;": u"\U0001d539", + "Breve;": u"\u02d8", + "Bscr;": u"\u212c", + "Bumpeq;": u"\u224e", + "CHcy;": u"\u0427", + "COPY": u"\xa9", + "COPY;": u"\xa9", + "Cacute;": u"\u0106", + "Cap;": u"\u22d2", + "CapitalDifferentialD;": u"\u2145", + "Cayleys;": u"\u212d", + "Ccaron;": u"\u010c", + "Ccedil": u"\xc7", + "Ccedil;": u"\xc7", + "Ccirc;": u"\u0108", + "Cconint;": u"\u2230", + "Cdot;": u"\u010a", + "Cedilla;": u"\xb8", + "CenterDot;": u"\xb7", + "Cfr;": u"\u212d", + "Chi;": u"\u03a7", + "CircleDot;": u"\u2299", + "CircleMinus;": u"\u2296", + "CirclePlus;": u"\u2295", + "CircleTimes;": u"\u2297", + "ClockwiseContourIntegral;": u"\u2232", + "CloseCurlyDoubleQuote;": u"\u201d", + "CloseCurlyQuote;": u"\u2019", + "Colon;": u"\u2237", + "Colone;": u"\u2a74", + "Congruent;": u"\u2261", + "Conint;": u"\u222f", + "ContourIntegral;": u"\u222e", + "Copf;": u"\u2102", + "Coproduct;": u"\u2210", + "CounterClockwiseContourIntegral;": u"\u2233", + "Cross;": u"\u2a2f", + "Cscr;": u"\U0001d49e", + "Cup;": u"\u22d3", + "CupCap;": u"\u224d", + "DD;": u"\u2145", + "DDotrahd;": u"\u2911", + "DJcy;": u"\u0402", + "DScy;": u"\u0405", + "DZcy;": u"\u040f", + "Dagger;": u"\u2021", + "Darr;": u"\u21a1", + "Dashv;": u"\u2ae4", + "Dcaron;": u"\u010e", + "Dcy;": u"\u0414", + "Del;": u"\u2207", + "Delta;": u"\u0394", + "Dfr;": u"\U0001d507", + "DiacriticalAcute;": u"\xb4", + "DiacriticalDot;": u"\u02d9", + "DiacriticalDoubleAcute;": u"\u02dd", + "DiacriticalGrave;": u"`", + "DiacriticalTilde;": u"\u02dc", + "Diamond;": u"\u22c4", + "DifferentialD;": u"\u2146", + "Dopf;": u"\U0001d53b", + "Dot;": u"\xa8", + "DotDot;": u"\u20dc", + "DotEqual;": u"\u2250", + "DoubleContourIntegral;": u"\u222f", + "DoubleDot;": u"\xa8", + "DoubleDownArrow;": u"\u21d3", + "DoubleLeftArrow;": u"\u21d0", + "DoubleLeftRightArrow;": u"\u21d4", + "DoubleLeftTee;": u"\u2ae4", + "DoubleLongLeftArrow;": u"\u27f8", + "DoubleLongLeftRightArrow;": u"\u27fa", + "DoubleLongRightArrow;": u"\u27f9", + "DoubleRightArrow;": u"\u21d2", + "DoubleRightTee;": u"\u22a8", + "DoubleUpArrow;": u"\u21d1", + "DoubleUpDownArrow;": u"\u21d5", + "DoubleVerticalBar;": u"\u2225", + "DownArrow;": u"\u2193", + "DownArrowBar;": u"\u2913", + "DownArrowUpArrow;": u"\u21f5", + "DownBreve;": u"\u0311", + "DownLeftRightVector;": u"\u2950", + "DownLeftTeeVector;": u"\u295e", + "DownLeftVector;": u"\u21bd", + "DownLeftVectorBar;": u"\u2956", + "DownRightTeeVector;": u"\u295f", + "DownRightVector;": u"\u21c1", + "DownRightVectorBar;": u"\u2957", + "DownTee;": u"\u22a4", + "DownTeeArrow;": u"\u21a7", + "Downarrow;": u"\u21d3", + "Dscr;": u"\U0001d49f", + "Dstrok;": u"\u0110", + "ENG;": u"\u014a", + "ETH": u"\xd0", + "ETH;": u"\xd0", + "Eacute": u"\xc9", + "Eacute;": u"\xc9", + "Ecaron;": u"\u011a", + "Ecirc": u"\xca", + "Ecirc;": u"\xca", + "Ecy;": u"\u042d", + "Edot;": u"\u0116", + "Efr;": u"\U0001d508", + "Egrave": u"\xc8", + "Egrave;": u"\xc8", + "Element;": u"\u2208", + "Emacr;": u"\u0112", + "EmptySmallSquare;": u"\u25fb", + "EmptyVerySmallSquare;": u"\u25ab", + "Eogon;": u"\u0118", + "Eopf;": u"\U0001d53c", + "Epsilon;": u"\u0395", + "Equal;": u"\u2a75", + "EqualTilde;": u"\u2242", + "Equilibrium;": u"\u21cc", + "Escr;": u"\u2130", + "Esim;": u"\u2a73", + "Eta;": u"\u0397", + "Euml": u"\xcb", + "Euml;": u"\xcb", + "Exists;": u"\u2203", + "ExponentialE;": u"\u2147", + "Fcy;": u"\u0424", + "Ffr;": u"\U0001d509", + "FilledSmallSquare;": u"\u25fc", + "FilledVerySmallSquare;": u"\u25aa", + "Fopf;": u"\U0001d53d", + "ForAll;": u"\u2200", + "Fouriertrf;": u"\u2131", + "Fscr;": u"\u2131", + "GJcy;": u"\u0403", + "GT": u">", + "GT;": u">", + "Gamma;": u"\u0393", + "Gammad;": u"\u03dc", + "Gbreve;": u"\u011e", + "Gcedil;": u"\u0122", + "Gcirc;": u"\u011c", + "Gcy;": u"\u0413", + "Gdot;": u"\u0120", + "Gfr;": u"\U0001d50a", + "Gg;": u"\u22d9", + "Gopf;": u"\U0001d53e", + "GreaterEqual;": u"\u2265", + "GreaterEqualLess;": u"\u22db", + "GreaterFullEqual;": u"\u2267", + "GreaterGreater;": u"\u2aa2", + "GreaterLess;": u"\u2277", + "GreaterSlantEqual;": u"\u2a7e", + "GreaterTilde;": u"\u2273", + "Gscr;": u"\U0001d4a2", + "Gt;": u"\u226b", + "HARDcy;": u"\u042a", + "Hacek;": u"\u02c7", + "Hat;": u"^", + "Hcirc;": u"\u0124", + "Hfr;": u"\u210c", + "HilbertSpace;": u"\u210b", + "Hopf;": u"\u210d", + "HorizontalLine;": u"\u2500", + "Hscr;": u"\u210b", + "Hstrok;": u"\u0126", + "HumpDownHump;": u"\u224e", + "HumpEqual;": u"\u224f", + "IEcy;": u"\u0415", + "IJlig;": u"\u0132", + "IOcy;": u"\u0401", + "Iacute": u"\xcd", + "Iacute;": u"\xcd", + "Icirc": u"\xce", + "Icirc;": u"\xce", + "Icy;": u"\u0418", + "Idot;": u"\u0130", + "Ifr;": u"\u2111", + "Igrave": u"\xcc", + "Igrave;": u"\xcc", + "Im;": u"\u2111", + "Imacr;": u"\u012a", + "ImaginaryI;": u"\u2148", + "Implies;": u"\u21d2", + "Int;": u"\u222c", + "Integral;": u"\u222b", + "Intersection;": u"\u22c2", + "InvisibleComma;": u"\u2063", + "InvisibleTimes;": u"\u2062", + "Iogon;": u"\u012e", + "Iopf;": u"\U0001d540", + "Iota;": u"\u0399", + "Iscr;": u"\u2110", + "Itilde;": u"\u0128", + "Iukcy;": u"\u0406", + "Iuml": u"\xcf", + "Iuml;": u"\xcf", + "Jcirc;": u"\u0134", + "Jcy;": u"\u0419", + "Jfr;": u"\U0001d50d", + "Jopf;": u"\U0001d541", + "Jscr;": u"\U0001d4a5", + "Jsercy;": u"\u0408", + "Jukcy;": u"\u0404", + "KHcy;": u"\u0425", + "KJcy;": u"\u040c", + "Kappa;": u"\u039a", + "Kcedil;": u"\u0136", + "Kcy;": u"\u041a", + "Kfr;": u"\U0001d50e", + "Kopf;": u"\U0001d542", + "Kscr;": u"\U0001d4a6", + "LJcy;": u"\u0409", + "LT": u"<", + "LT;": u"<", + "Lacute;": u"\u0139", + "Lambda;": u"\u039b", + "Lang;": u"\u27ea", + "Laplacetrf;": u"\u2112", + "Larr;": u"\u219e", + "Lcaron;": u"\u013d", + "Lcedil;": u"\u013b", + "Lcy;": u"\u041b", + "LeftAngleBracket;": u"\u27e8", + "LeftArrow;": u"\u2190", + "LeftArrowBar;": u"\u21e4", + "LeftArrowRightArrow;": u"\u21c6", + "LeftCeiling;": u"\u2308", + "LeftDoubleBracket;": u"\u27e6", + "LeftDownTeeVector;": u"\u2961", + "LeftDownVector;": u"\u21c3", + "LeftDownVectorBar;": u"\u2959", + "LeftFloor;": u"\u230a", + "LeftRightArrow;": u"\u2194", + "LeftRightVector;": u"\u294e", + "LeftTee;": u"\u22a3", + "LeftTeeArrow;": u"\u21a4", + "LeftTeeVector;": u"\u295a", + "LeftTriangle;": u"\u22b2", + "LeftTriangleBar;": u"\u29cf", + "LeftTriangleEqual;": u"\u22b4", + "LeftUpDownVector;": u"\u2951", + "LeftUpTeeVector;": u"\u2960", + "LeftUpVector;": u"\u21bf", + "LeftUpVectorBar;": u"\u2958", + "LeftVector;": u"\u21bc", + "LeftVectorBar;": u"\u2952", + "Leftarrow;": u"\u21d0", + "Leftrightarrow;": u"\u21d4", + "LessEqualGreater;": u"\u22da", + "LessFullEqual;": u"\u2266", + "LessGreater;": u"\u2276", + "LessLess;": u"\u2aa1", + "LessSlantEqual;": u"\u2a7d", + "LessTilde;": u"\u2272", + "Lfr;": u"\U0001d50f", + "Ll;": u"\u22d8", + "Lleftarrow;": u"\u21da", + "Lmidot;": u"\u013f", + "LongLeftArrow;": u"\u27f5", + "LongLeftRightArrow;": u"\u27f7", + "LongRightArrow;": u"\u27f6", + "Longleftarrow;": u"\u27f8", + "Longleftrightarrow;": u"\u27fa", + "Longrightarrow;": u"\u27f9", + "Lopf;": u"\U0001d543", + "LowerLeftArrow;": u"\u2199", + "LowerRightArrow;": u"\u2198", + "Lscr;": u"\u2112", + "Lsh;": u"\u21b0", + "Lstrok;": u"\u0141", + "Lt;": u"\u226a", + "Map;": u"\u2905", + "Mcy;": u"\u041c", + "MediumSpace;": u"\u205f", + "Mellintrf;": u"\u2133", + "Mfr;": u"\U0001d510", + "MinusPlus;": u"\u2213", + "Mopf;": u"\U0001d544", + "Mscr;": u"\u2133", + "Mu;": u"\u039c", + "NJcy;": u"\u040a", + "Nacute;": u"\u0143", + "Ncaron;": u"\u0147", + "Ncedil;": u"\u0145", + "Ncy;": u"\u041d", + "NegativeMediumSpace;": u"\u200b", + "NegativeThickSpace;": u"\u200b", + "NegativeThinSpace;": u"\u200b", + "NegativeVeryThinSpace;": u"\u200b", + "NestedGreaterGreater;": u"\u226b", + "NestedLessLess;": u"\u226a", + "NewLine;": u"\n", + "Nfr;": u"\U0001d511", + "NoBreak;": u"\u2060", + "NonBreakingSpace;": u"\xa0", + "Nopf;": u"\u2115", + "Not;": u"\u2aec", + "NotCongruent;": u"\u2262", + "NotCupCap;": u"\u226d", + "NotDoubleVerticalBar;": u"\u2226", + "NotElement;": u"\u2209", + "NotEqual;": u"\u2260", + "NotEqualTilde;": u"\u2242\u0338", + "NotExists;": u"\u2204", + "NotGreater;": u"\u226f", + "NotGreaterEqual;": u"\u2271", + "NotGreaterFullEqual;": u"\u2267\u0338", + "NotGreaterGreater;": u"\u226b\u0338", + "NotGreaterLess;": u"\u2279", + "NotGreaterSlantEqual;": u"\u2a7e\u0338", + "NotGreaterTilde;": u"\u2275", + "NotHumpDownHump;": u"\u224e\u0338", + "NotHumpEqual;": u"\u224f\u0338", + "NotLeftTriangle;": u"\u22ea", + "NotLeftTriangleBar;": u"\u29cf\u0338", + "NotLeftTriangleEqual;": u"\u22ec", + "NotLess;": u"\u226e", + "NotLessEqual;": u"\u2270", + "NotLessGreater;": u"\u2278", + "NotLessLess;": u"\u226a\u0338", + "NotLessSlantEqual;": u"\u2a7d\u0338", + "NotLessTilde;": u"\u2274", + "NotNestedGreaterGreater;": u"\u2aa2\u0338", + "NotNestedLessLess;": u"\u2aa1\u0338", + "NotPrecedes;": u"\u2280", + "NotPrecedesEqual;": u"\u2aaf\u0338", + "NotPrecedesSlantEqual;": u"\u22e0", + "NotReverseElement;": u"\u220c", + "NotRightTriangle;": u"\u22eb", + "NotRightTriangleBar;": u"\u29d0\u0338", + "NotRightTriangleEqual;": u"\u22ed", + "NotSquareSubset;": u"\u228f\u0338", + "NotSquareSubsetEqual;": u"\u22e2", + "NotSquareSuperset;": u"\u2290\u0338", + "NotSquareSupersetEqual;": u"\u22e3", + "NotSubset;": u"\u2282\u20d2", + "NotSubsetEqual;": u"\u2288", + "NotSucceeds;": u"\u2281", + "NotSucceedsEqual;": u"\u2ab0\u0338", + "NotSucceedsSlantEqual;": u"\u22e1", + "NotSucceedsTilde;": u"\u227f\u0338", + "NotSuperset;": u"\u2283\u20d2", + "NotSupersetEqual;": u"\u2289", + "NotTilde;": u"\u2241", + "NotTildeEqual;": u"\u2244", + "NotTildeFullEqual;": u"\u2247", + "NotTildeTilde;": u"\u2249", + "NotVerticalBar;": u"\u2224", + "Nscr;": u"\U0001d4a9", + "Ntilde": u"\xd1", + "Ntilde;": u"\xd1", + "Nu;": u"\u039d", + "OElig;": u"\u0152", + "Oacute": u"\xd3", + "Oacute;": u"\xd3", + "Ocirc": u"\xd4", + "Ocirc;": u"\xd4", + "Ocy;": u"\u041e", + "Odblac;": u"\u0150", + "Ofr;": u"\U0001d512", + "Ograve": u"\xd2", + "Ograve;": u"\xd2", + "Omacr;": u"\u014c", + "Omega;": u"\u03a9", + "Omicron;": u"\u039f", + "Oopf;": u"\U0001d546", + "OpenCurlyDoubleQuote;": u"\u201c", + "OpenCurlyQuote;": u"\u2018", + "Or;": u"\u2a54", + "Oscr;": u"\U0001d4aa", + "Oslash": u"\xd8", + "Oslash;": u"\xd8", + "Otilde": u"\xd5", + "Otilde;": u"\xd5", + "Otimes;": u"\u2a37", + "Ouml": u"\xd6", + "Ouml;": u"\xd6", + "OverBar;": u"\u203e", + "OverBrace;": u"\u23de", + "OverBracket;": u"\u23b4", + "OverParenthesis;": u"\u23dc", + "PartialD;": u"\u2202", + "Pcy;": u"\u041f", + "Pfr;": u"\U0001d513", + "Phi;": u"\u03a6", + "Pi;": u"\u03a0", + "PlusMinus;": u"\xb1", + "Poincareplane;": u"\u210c", + "Popf;": u"\u2119", + "Pr;": u"\u2abb", + "Precedes;": u"\u227a", + "PrecedesEqual;": u"\u2aaf", + "PrecedesSlantEqual;": u"\u227c", + "PrecedesTilde;": u"\u227e", + "Prime;": u"\u2033", + "Product;": u"\u220f", + "Proportion;": u"\u2237", + "Proportional;": u"\u221d", + "Pscr;": u"\U0001d4ab", + "Psi;": u"\u03a8", + "QUOT": u"\"", + "QUOT;": u"\"", + "Qfr;": u"\U0001d514", + "Qopf;": u"\u211a", + "Qscr;": u"\U0001d4ac", + "RBarr;": u"\u2910", + "REG": u"\xae", + "REG;": u"\xae", + "Racute;": u"\u0154", + "Rang;": u"\u27eb", + "Rarr;": u"\u21a0", + "Rarrtl;": u"\u2916", + "Rcaron;": u"\u0158", + "Rcedil;": u"\u0156", + "Rcy;": u"\u0420", + "Re;": u"\u211c", + "ReverseElement;": u"\u220b", + "ReverseEquilibrium;": u"\u21cb", + "ReverseUpEquilibrium;": u"\u296f", + "Rfr;": u"\u211c", + "Rho;": u"\u03a1", + "RightAngleBracket;": u"\u27e9", + "RightArrow;": u"\u2192", + "RightArrowBar;": u"\u21e5", + "RightArrowLeftArrow;": u"\u21c4", + "RightCeiling;": u"\u2309", + "RightDoubleBracket;": u"\u27e7", + "RightDownTeeVector;": u"\u295d", + "RightDownVector;": u"\u21c2", + "RightDownVectorBar;": u"\u2955", + "RightFloor;": u"\u230b", + "RightTee;": u"\u22a2", + "RightTeeArrow;": u"\u21a6", + "RightTeeVector;": u"\u295b", + "RightTriangle;": u"\u22b3", + "RightTriangleBar;": u"\u29d0", + "RightTriangleEqual;": u"\u22b5", + "RightUpDownVector;": u"\u294f", + "RightUpTeeVector;": u"\u295c", + "RightUpVector;": u"\u21be", + "RightUpVectorBar;": u"\u2954", + "RightVector;": u"\u21c0", + "RightVectorBar;": u"\u2953", + "Rightarrow;": u"\u21d2", + "Ropf;": u"\u211d", + "RoundImplies;": u"\u2970", + "Rrightarrow;": u"\u21db", + "Rscr;": u"\u211b", + "Rsh;": u"\u21b1", + "RuleDelayed;": u"\u29f4", + "SHCHcy;": u"\u0429", + "SHcy;": u"\u0428", + "SOFTcy;": u"\u042c", + "Sacute;": u"\u015a", + "Sc;": u"\u2abc", + "Scaron;": u"\u0160", + "Scedil;": u"\u015e", + "Scirc;": u"\u015c", + "Scy;": u"\u0421", + "Sfr;": u"\U0001d516", + "ShortDownArrow;": u"\u2193", + "ShortLeftArrow;": u"\u2190", + "ShortRightArrow;": u"\u2192", + "ShortUpArrow;": u"\u2191", + "Sigma;": u"\u03a3", + "SmallCircle;": u"\u2218", + "Sopf;": u"\U0001d54a", + "Sqrt;": u"\u221a", + "Square;": u"\u25a1", + "SquareIntersection;": u"\u2293", + "SquareSubset;": u"\u228f", + "SquareSubsetEqual;": u"\u2291", + "SquareSuperset;": u"\u2290", + "SquareSupersetEqual;": u"\u2292", + "SquareUnion;": u"\u2294", + "Sscr;": u"\U0001d4ae", + "Star;": u"\u22c6", + "Sub;": u"\u22d0", + "Subset;": u"\u22d0", + "SubsetEqual;": u"\u2286", + "Succeeds;": u"\u227b", + "SucceedsEqual;": u"\u2ab0", + "SucceedsSlantEqual;": u"\u227d", + "SucceedsTilde;": u"\u227f", + "SuchThat;": u"\u220b", + "Sum;": u"\u2211", + "Sup;": u"\u22d1", + "Superset;": u"\u2283", + "SupersetEqual;": u"\u2287", + "Supset;": u"\u22d1", + "THORN": u"\xde", + "THORN;": u"\xde", + "TRADE;": u"\u2122", + "TSHcy;": u"\u040b", + "TScy;": u"\u0426", + "Tab;": u"\t", + "Tau;": u"\u03a4", + "Tcaron;": u"\u0164", + "Tcedil;": u"\u0162", + "Tcy;": u"\u0422", + "Tfr;": u"\U0001d517", + "Therefore;": u"\u2234", + "Theta;": u"\u0398", + "ThickSpace;": u"\u205f\u200a", + "ThinSpace;": u"\u2009", + "Tilde;": u"\u223c", + "TildeEqual;": u"\u2243", + "TildeFullEqual;": u"\u2245", + "TildeTilde;": u"\u2248", + "Topf;": u"\U0001d54b", + "TripleDot;": u"\u20db", + "Tscr;": u"\U0001d4af", + "Tstrok;": u"\u0166", + "Uacute": u"\xda", + "Uacute;": u"\xda", + "Uarr;": u"\u219f", + "Uarrocir;": u"\u2949", + "Ubrcy;": u"\u040e", + "Ubreve;": u"\u016c", + "Ucirc": u"\xdb", + "Ucirc;": u"\xdb", + "Ucy;": u"\u0423", + "Udblac;": u"\u0170", + "Ufr;": u"\U0001d518", + "Ugrave": u"\xd9", + "Ugrave;": u"\xd9", + "Umacr;": u"\u016a", + "UnderBar;": u"_", + "UnderBrace;": u"\u23df", + "UnderBracket;": u"\u23b5", + "UnderParenthesis;": u"\u23dd", + "Union;": u"\u22c3", + "UnionPlus;": u"\u228e", + "Uogon;": u"\u0172", + "Uopf;": u"\U0001d54c", + "UpArrow;": u"\u2191", + "UpArrowBar;": u"\u2912", + "UpArrowDownArrow;": u"\u21c5", + "UpDownArrow;": u"\u2195", + "UpEquilibrium;": u"\u296e", + "UpTee;": u"\u22a5", + "UpTeeArrow;": u"\u21a5", + "Uparrow;": u"\u21d1", + "Updownarrow;": u"\u21d5", + "UpperLeftArrow;": u"\u2196", + "UpperRightArrow;": u"\u2197", + "Upsi;": u"\u03d2", + "Upsilon;": u"\u03a5", + "Uring;": u"\u016e", + "Uscr;": u"\U0001d4b0", + "Utilde;": u"\u0168", + "Uuml": u"\xdc", + "Uuml;": u"\xdc", + "VDash;": u"\u22ab", + "Vbar;": u"\u2aeb", + "Vcy;": u"\u0412", + "Vdash;": u"\u22a9", + "Vdashl;": u"\u2ae6", + "Vee;": u"\u22c1", + "Verbar;": u"\u2016", + "Vert;": u"\u2016", + "VerticalBar;": u"\u2223", + "VerticalLine;": u"|", + "VerticalSeparator;": u"\u2758", + "VerticalTilde;": u"\u2240", + "VeryThinSpace;": u"\u200a", + "Vfr;": u"\U0001d519", + "Vopf;": u"\U0001d54d", + "Vscr;": u"\U0001d4b1", + "Vvdash;": u"\u22aa", + "Wcirc;": u"\u0174", + "Wedge;": u"\u22c0", + "Wfr;": u"\U0001d51a", + "Wopf;": u"\U0001d54e", + "Wscr;": u"\U0001d4b2", + "Xfr;": u"\U0001d51b", + "Xi;": u"\u039e", + "Xopf;": u"\U0001d54f", + "Xscr;": u"\U0001d4b3", + "YAcy;": u"\u042f", + "YIcy;": u"\u0407", + "YUcy;": u"\u042e", + "Yacute": u"\xdd", + "Yacute;": u"\xdd", + "Ycirc;": u"\u0176", + "Ycy;": u"\u042b", + "Yfr;": u"\U0001d51c", + "Yopf;": u"\U0001d550", + "Yscr;": u"\U0001d4b4", + "Yuml;": u"\u0178", + "ZHcy;": u"\u0416", + "Zacute;": u"\u0179", + "Zcaron;": u"\u017d", + "Zcy;": u"\u0417", + "Zdot;": u"\u017b", + "ZeroWidthSpace;": u"\u200b", + "Zeta;": u"\u0396", + "Zfr;": u"\u2128", + "Zopf;": u"\u2124", + "Zscr;": u"\U0001d4b5", + "aacute": u"\xe1", + "aacute;": u"\xe1", + "abreve;": u"\u0103", + "ac;": u"\u223e", + "acE;": u"\u223e\u0333", + "acd;": u"\u223f", + "acirc": u"\xe2", + "acirc;": u"\xe2", + "acute": u"\xb4", + "acute;": u"\xb4", + "acy;": u"\u0430", + "aelig": u"\xe6", + "aelig;": u"\xe6", + "af;": u"\u2061", + "afr;": u"\U0001d51e", + "agrave": u"\xe0", + "agrave;": u"\xe0", + "alefsym;": u"\u2135", + "aleph;": u"\u2135", + "alpha;": u"\u03b1", + "amacr;": u"\u0101", + "amalg;": u"\u2a3f", + "amp": u"&", + "amp;": u"&", + "and;": u"\u2227", + "andand;": u"\u2a55", + "andd;": u"\u2a5c", + "andslope;": u"\u2a58", + "andv;": u"\u2a5a", + "ang;": u"\u2220", + "ange;": u"\u29a4", + "angle;": u"\u2220", + "angmsd;": u"\u2221", + "angmsdaa;": u"\u29a8", + "angmsdab;": u"\u29a9", + "angmsdac;": u"\u29aa", + "angmsdad;": u"\u29ab", + "angmsdae;": u"\u29ac", + "angmsdaf;": u"\u29ad", + "angmsdag;": u"\u29ae", + "angmsdah;": u"\u29af", + "angrt;": u"\u221f", + "angrtvb;": u"\u22be", + "angrtvbd;": u"\u299d", + "angsph;": u"\u2222", + "angst;": u"\xc5", + "angzarr;": u"\u237c", + "aogon;": u"\u0105", + "aopf;": u"\U0001d552", + "ap;": u"\u2248", + "apE;": u"\u2a70", + "apacir;": u"\u2a6f", + "ape;": u"\u224a", + "apid;": u"\u224b", + "apos;": u"'", + "approx;": u"\u2248", + "approxeq;": u"\u224a", + "aring": u"\xe5", + "aring;": u"\xe5", + "ascr;": u"\U0001d4b6", + "ast;": u"*", + "asymp;": u"\u2248", + "asympeq;": u"\u224d", + "atilde": u"\xe3", + "atilde;": u"\xe3", + "auml": u"\xe4", + "auml;": u"\xe4", + "awconint;": u"\u2233", + "awint;": u"\u2a11", + "bNot;": u"\u2aed", + "backcong;": u"\u224c", + "backepsilon;": u"\u03f6", + "backprime;": u"\u2035", + "backsim;": u"\u223d", + "backsimeq;": u"\u22cd", + "barvee;": u"\u22bd", + "barwed;": u"\u2305", + "barwedge;": u"\u2305", + "bbrk;": u"\u23b5", + "bbrktbrk;": u"\u23b6", + "bcong;": u"\u224c", + "bcy;": u"\u0431", + "bdquo;": u"\u201e", + "becaus;": u"\u2235", + "because;": u"\u2235", + "bemptyv;": u"\u29b0", + "bepsi;": u"\u03f6", + "bernou;": u"\u212c", + "beta;": u"\u03b2", + "beth;": u"\u2136", + "between;": u"\u226c", + "bfr;": u"\U0001d51f", + "bigcap;": u"\u22c2", + "bigcirc;": u"\u25ef", + "bigcup;": u"\u22c3", + "bigodot;": u"\u2a00", + "bigoplus;": u"\u2a01", + "bigotimes;": u"\u2a02", + "bigsqcup;": u"\u2a06", + "bigstar;": u"\u2605", + "bigtriangledown;": u"\u25bd", + "bigtriangleup;": u"\u25b3", + "biguplus;": u"\u2a04", + "bigvee;": u"\u22c1", + "bigwedge;": u"\u22c0", + "bkarow;": u"\u290d", + "blacklozenge;": u"\u29eb", + "blacksquare;": u"\u25aa", + "blacktriangle;": u"\u25b4", + "blacktriangledown;": u"\u25be", + "blacktriangleleft;": u"\u25c2", + "blacktriangleright;": u"\u25b8", + "blank;": u"\u2423", + "blk12;": u"\u2592", + "blk14;": u"\u2591", + "blk34;": u"\u2593", + "block;": u"\u2588", + "bne;": u"=\u20e5", + "bnequiv;": u"\u2261\u20e5", + "bnot;": u"\u2310", + "bopf;": u"\U0001d553", + "bot;": u"\u22a5", + "bottom;": u"\u22a5", + "bowtie;": u"\u22c8", + "boxDL;": u"\u2557", + "boxDR;": u"\u2554", + "boxDl;": u"\u2556", + "boxDr;": u"\u2553", + "boxH;": u"\u2550", + "boxHD;": u"\u2566", + "boxHU;": u"\u2569", + "boxHd;": u"\u2564", + "boxHu;": u"\u2567", + "boxUL;": u"\u255d", + "boxUR;": u"\u255a", + "boxUl;": u"\u255c", + "boxUr;": u"\u2559", + "boxV;": u"\u2551", + "boxVH;": u"\u256c", + "boxVL;": u"\u2563", + "boxVR;": u"\u2560", + "boxVh;": u"\u256b", + "boxVl;": u"\u2562", + "boxVr;": u"\u255f", + "boxbox;": u"\u29c9", + "boxdL;": u"\u2555", + "boxdR;": u"\u2552", + "boxdl;": u"\u2510", + "boxdr;": u"\u250c", + "boxh;": u"\u2500", + "boxhD;": u"\u2565", + "boxhU;": u"\u2568", + "boxhd;": u"\u252c", + "boxhu;": u"\u2534", + "boxminus;": u"\u229f", + "boxplus;": u"\u229e", + "boxtimes;": u"\u22a0", + "boxuL;": u"\u255b", + "boxuR;": u"\u2558", + "boxul;": u"\u2518", + "boxur;": u"\u2514", + "boxv;": u"\u2502", + "boxvH;": u"\u256a", + "boxvL;": u"\u2561", + "boxvR;": u"\u255e", + "boxvh;": u"\u253c", + "boxvl;": u"\u2524", + "boxvr;": u"\u251c", + "bprime;": u"\u2035", + "breve;": u"\u02d8", + "brvbar": u"\xa6", + "brvbar;": u"\xa6", + "bscr;": u"\U0001d4b7", + "bsemi;": u"\u204f", + "bsim;": u"\u223d", + "bsime;": u"\u22cd", + "bsol;": u"\\", + "bsolb;": u"\u29c5", + "bsolhsub;": u"\u27c8", + "bull;": u"\u2022", + "bullet;": u"\u2022", + "bump;": u"\u224e", + "bumpE;": u"\u2aae", + "bumpe;": u"\u224f", + "bumpeq;": u"\u224f", + "cacute;": u"\u0107", + "cap;": u"\u2229", + "capand;": u"\u2a44", + "capbrcup;": u"\u2a49", + "capcap;": u"\u2a4b", + "capcup;": u"\u2a47", + "capdot;": u"\u2a40", + "caps;": u"\u2229\ufe00", + "caret;": u"\u2041", + "caron;": u"\u02c7", + "ccaps;": u"\u2a4d", + "ccaron;": u"\u010d", + "ccedil": u"\xe7", + "ccedil;": u"\xe7", + "ccirc;": u"\u0109", + "ccups;": u"\u2a4c", + "ccupssm;": u"\u2a50", + "cdot;": u"\u010b", + "cedil": u"\xb8", + "cedil;": u"\xb8", + "cemptyv;": u"\u29b2", + "cent": u"\xa2", + "cent;": u"\xa2", + "centerdot;": u"\xb7", + "cfr;": u"\U0001d520", + "chcy;": u"\u0447", + "check;": u"\u2713", + "checkmark;": u"\u2713", + "chi;": u"\u03c7", + "cir;": u"\u25cb", + "cirE;": u"\u29c3", + "circ;": u"\u02c6", + "circeq;": u"\u2257", + "circlearrowleft;": u"\u21ba", + "circlearrowright;": u"\u21bb", + "circledR;": u"\xae", + "circledS;": u"\u24c8", + "circledast;": u"\u229b", + "circledcirc;": u"\u229a", + "circleddash;": u"\u229d", + "cire;": u"\u2257", + "cirfnint;": u"\u2a10", + "cirmid;": u"\u2aef", + "cirscir;": u"\u29c2", + "clubs;": u"\u2663", + "clubsuit;": u"\u2663", + "colon;": u":", + "colone;": u"\u2254", + "coloneq;": u"\u2254", + "comma;": u",", + "commat;": u"@", + "comp;": u"\u2201", + "compfn;": u"\u2218", + "complement;": u"\u2201", + "complexes;": u"\u2102", + "cong;": u"\u2245", + "congdot;": u"\u2a6d", + "conint;": u"\u222e", + "copf;": u"\U0001d554", + "coprod;": u"\u2210", + "copy": u"\xa9", + "copy;": u"\xa9", + "copysr;": u"\u2117", + "crarr;": u"\u21b5", + "cross;": u"\u2717", + "cscr;": u"\U0001d4b8", + "csub;": u"\u2acf", + "csube;": u"\u2ad1", + "csup;": u"\u2ad0", + "csupe;": u"\u2ad2", + "ctdot;": u"\u22ef", + "cudarrl;": u"\u2938", + "cudarrr;": u"\u2935", + "cuepr;": u"\u22de", + "cuesc;": u"\u22df", + "cularr;": u"\u21b6", + "cularrp;": u"\u293d", + "cup;": u"\u222a", + "cupbrcap;": u"\u2a48", + "cupcap;": u"\u2a46", + "cupcup;": u"\u2a4a", + "cupdot;": u"\u228d", + "cupor;": u"\u2a45", + "cups;": u"\u222a\ufe00", + "curarr;": u"\u21b7", + "curarrm;": u"\u293c", + "curlyeqprec;": u"\u22de", + "curlyeqsucc;": u"\u22df", + "curlyvee;": u"\u22ce", + "curlywedge;": u"\u22cf", + "curren": u"\xa4", + "curren;": u"\xa4", + "curvearrowleft;": u"\u21b6", + "curvearrowright;": u"\u21b7", + "cuvee;": u"\u22ce", + "cuwed;": u"\u22cf", + "cwconint;": u"\u2232", + "cwint;": u"\u2231", + "cylcty;": u"\u232d", + "dArr;": u"\u21d3", + "dHar;": u"\u2965", + "dagger;": u"\u2020", + "daleth;": u"\u2138", + "darr;": u"\u2193", + "dash;": u"\u2010", + "dashv;": u"\u22a3", + "dbkarow;": u"\u290f", + "dblac;": u"\u02dd", + "dcaron;": u"\u010f", + "dcy;": u"\u0434", + "dd;": u"\u2146", + "ddagger;": u"\u2021", + "ddarr;": u"\u21ca", + "ddotseq;": u"\u2a77", + "deg": u"\xb0", + "deg;": u"\xb0", + "delta;": u"\u03b4", + "demptyv;": u"\u29b1", + "dfisht;": u"\u297f", + "dfr;": u"\U0001d521", + "dharl;": u"\u21c3", + "dharr;": u"\u21c2", + "diam;": u"\u22c4", + "diamond;": u"\u22c4", + "diamondsuit;": u"\u2666", + "diams;": u"\u2666", + "die;": u"\xa8", + "digamma;": u"\u03dd", + "disin;": u"\u22f2", + "div;": u"\xf7", + "divide": u"\xf7", + "divide;": u"\xf7", + "divideontimes;": u"\u22c7", + "divonx;": u"\u22c7", + "djcy;": u"\u0452", + "dlcorn;": u"\u231e", + "dlcrop;": u"\u230d", + "dollar;": u"$", + "dopf;": u"\U0001d555", + "dot;": u"\u02d9", + "doteq;": u"\u2250", + "doteqdot;": u"\u2251", + "dotminus;": u"\u2238", + "dotplus;": u"\u2214", + "dotsquare;": u"\u22a1", + "doublebarwedge;": u"\u2306", + "downarrow;": u"\u2193", + "downdownarrows;": u"\u21ca", + "downharpoonleft;": u"\u21c3", + "downharpoonright;": u"\u21c2", + "drbkarow;": u"\u2910", + "drcorn;": u"\u231f", + "drcrop;": u"\u230c", + "dscr;": u"\U0001d4b9", + "dscy;": u"\u0455", + "dsol;": u"\u29f6", + "dstrok;": u"\u0111", + "dtdot;": u"\u22f1", + "dtri;": u"\u25bf", + "dtrif;": u"\u25be", + "duarr;": u"\u21f5", + "duhar;": u"\u296f", + "dwangle;": u"\u29a6", + "dzcy;": u"\u045f", + "dzigrarr;": u"\u27ff", + "eDDot;": u"\u2a77", + "eDot;": u"\u2251", + "eacute": u"\xe9", + "eacute;": u"\xe9", + "easter;": u"\u2a6e", + "ecaron;": u"\u011b", + "ecir;": u"\u2256", + "ecirc": u"\xea", + "ecirc;": u"\xea", + "ecolon;": u"\u2255", + "ecy;": u"\u044d", + "edot;": u"\u0117", + "ee;": u"\u2147", + "efDot;": u"\u2252", + "efr;": u"\U0001d522", + "eg;": u"\u2a9a", + "egrave": u"\xe8", + "egrave;": u"\xe8", + "egs;": u"\u2a96", + "egsdot;": u"\u2a98", + "el;": u"\u2a99", + "elinters;": u"\u23e7", + "ell;": u"\u2113", + "els;": u"\u2a95", + "elsdot;": u"\u2a97", + "emacr;": u"\u0113", + "empty;": u"\u2205", + "emptyset;": u"\u2205", + "emptyv;": u"\u2205", + "emsp13;": u"\u2004", + "emsp14;": u"\u2005", + "emsp;": u"\u2003", + "eng;": u"\u014b", + "ensp;": u"\u2002", + "eogon;": u"\u0119", + "eopf;": u"\U0001d556", + "epar;": u"\u22d5", + "eparsl;": u"\u29e3", + "eplus;": u"\u2a71", + "epsi;": u"\u03b5", + "epsilon;": u"\u03b5", + "epsiv;": u"\u03f5", + "eqcirc;": u"\u2256", + "eqcolon;": u"\u2255", + "eqsim;": u"\u2242", + "eqslantgtr;": u"\u2a96", + "eqslantless;": u"\u2a95", + "equals;": u"=", + "equest;": u"\u225f", + "equiv;": u"\u2261", + "equivDD;": u"\u2a78", + "eqvparsl;": u"\u29e5", + "erDot;": u"\u2253", + "erarr;": u"\u2971", + "escr;": u"\u212f", + "esdot;": u"\u2250", + "esim;": u"\u2242", + "eta;": u"\u03b7", + "eth": u"\xf0", + "eth;": u"\xf0", + "euml": u"\xeb", + "euml;": u"\xeb", + "euro;": u"\u20ac", + "excl;": u"!", + "exist;": u"\u2203", + "expectation;": u"\u2130", + "exponentiale;": u"\u2147", + "fallingdotseq;": u"\u2252", + "fcy;": u"\u0444", + "female;": u"\u2640", + "ffilig;": u"\ufb03", + "fflig;": u"\ufb00", + "ffllig;": u"\ufb04", + "ffr;": u"\U0001d523", + "filig;": u"\ufb01", + "fjlig;": u"fj", + "flat;": u"\u266d", + "fllig;": u"\ufb02", + "fltns;": u"\u25b1", + "fnof;": u"\u0192", + "fopf;": u"\U0001d557", + "forall;": u"\u2200", + "fork;": u"\u22d4", + "forkv;": u"\u2ad9", + "fpartint;": u"\u2a0d", + "frac12": u"\xbd", + "frac12;": u"\xbd", + "frac13;": u"\u2153", + "frac14": u"\xbc", + "frac14;": u"\xbc", + "frac15;": u"\u2155", + "frac16;": u"\u2159", + "frac18;": u"\u215b", + "frac23;": u"\u2154", + "frac25;": u"\u2156", + "frac34": u"\xbe", + "frac34;": u"\xbe", + "frac35;": u"\u2157", + "frac38;": u"\u215c", + "frac45;": u"\u2158", + "frac56;": u"\u215a", + "frac58;": u"\u215d", + "frac78;": u"\u215e", + "frasl;": u"\u2044", + "frown;": u"\u2322", + "fscr;": u"\U0001d4bb", + "gE;": u"\u2267", + "gEl;": u"\u2a8c", + "gacute;": u"\u01f5", + "gamma;": u"\u03b3", + "gammad;": u"\u03dd", + "gap;": u"\u2a86", + "gbreve;": u"\u011f", + "gcirc;": u"\u011d", + "gcy;": u"\u0433", + "gdot;": u"\u0121", + "ge;": u"\u2265", + "gel;": u"\u22db", + "geq;": u"\u2265", + "geqq;": u"\u2267", + "geqslant;": u"\u2a7e", + "ges;": u"\u2a7e", + "gescc;": u"\u2aa9", + "gesdot;": u"\u2a80", + "gesdoto;": u"\u2a82", + "gesdotol;": u"\u2a84", + "gesl;": u"\u22db\ufe00", + "gesles;": u"\u2a94", + "gfr;": u"\U0001d524", + "gg;": u"\u226b", + "ggg;": u"\u22d9", + "gimel;": u"\u2137", + "gjcy;": u"\u0453", + "gl;": u"\u2277", + "glE;": u"\u2a92", + "gla;": u"\u2aa5", + "glj;": u"\u2aa4", + "gnE;": u"\u2269", + "gnap;": u"\u2a8a", + "gnapprox;": u"\u2a8a", + "gne;": u"\u2a88", + "gneq;": u"\u2a88", + "gneqq;": u"\u2269", + "gnsim;": u"\u22e7", + "gopf;": u"\U0001d558", + "grave;": u"`", + "gscr;": u"\u210a", + "gsim;": u"\u2273", + "gsime;": u"\u2a8e", + "gsiml;": u"\u2a90", + "gt": u">", + "gt;": u">", + "gtcc;": u"\u2aa7", + "gtcir;": u"\u2a7a", + "gtdot;": u"\u22d7", + "gtlPar;": u"\u2995", + "gtquest;": u"\u2a7c", + "gtrapprox;": u"\u2a86", + "gtrarr;": u"\u2978", + "gtrdot;": u"\u22d7", + "gtreqless;": u"\u22db", + "gtreqqless;": u"\u2a8c", + "gtrless;": u"\u2277", + "gtrsim;": u"\u2273", + "gvertneqq;": u"\u2269\ufe00", + "gvnE;": u"\u2269\ufe00", + "hArr;": u"\u21d4", + "hairsp;": u"\u200a", + "half;": u"\xbd", + "hamilt;": u"\u210b", + "hardcy;": u"\u044a", + "harr;": u"\u2194", + "harrcir;": u"\u2948", + "harrw;": u"\u21ad", + "hbar;": u"\u210f", + "hcirc;": u"\u0125", + "hearts;": u"\u2665", + "heartsuit;": u"\u2665", + "hellip;": u"\u2026", + "hercon;": u"\u22b9", + "hfr;": u"\U0001d525", + "hksearow;": u"\u2925", + "hkswarow;": u"\u2926", + "hoarr;": u"\u21ff", + "homtht;": u"\u223b", + "hookleftarrow;": u"\u21a9", + "hookrightarrow;": u"\u21aa", + "hopf;": u"\U0001d559", + "horbar;": u"\u2015", + "hscr;": u"\U0001d4bd", + "hslash;": u"\u210f", + "hstrok;": u"\u0127", + "hybull;": u"\u2043", + "hyphen;": u"\u2010", + "iacute": u"\xed", + "iacute;": u"\xed", + "ic;": u"\u2063", + "icirc": u"\xee", + "icirc;": u"\xee", + "icy;": u"\u0438", + "iecy;": u"\u0435", + "iexcl": u"\xa1", + "iexcl;": u"\xa1", + "iff;": u"\u21d4", + "ifr;": u"\U0001d526", + "igrave": u"\xec", + "igrave;": u"\xec", + "ii;": u"\u2148", + "iiiint;": u"\u2a0c", + "iiint;": u"\u222d", + "iinfin;": u"\u29dc", + "iiota;": u"\u2129", + "ijlig;": u"\u0133", + "imacr;": u"\u012b", + "image;": u"\u2111", + "imagline;": u"\u2110", + "imagpart;": u"\u2111", + "imath;": u"\u0131", + "imof;": u"\u22b7", + "imped;": u"\u01b5", + "in;": u"\u2208", + "incare;": u"\u2105", + "infin;": u"\u221e", + "infintie;": u"\u29dd", + "inodot;": u"\u0131", + "int;": u"\u222b", + "intcal;": u"\u22ba", + "integers;": u"\u2124", + "intercal;": u"\u22ba", + "intlarhk;": u"\u2a17", + "intprod;": u"\u2a3c", + "iocy;": u"\u0451", + "iogon;": u"\u012f", + "iopf;": u"\U0001d55a", + "iota;": u"\u03b9", + "iprod;": u"\u2a3c", + "iquest": u"\xbf", + "iquest;": u"\xbf", + "iscr;": u"\U0001d4be", + "isin;": u"\u2208", + "isinE;": u"\u22f9", + "isindot;": u"\u22f5", + "isins;": u"\u22f4", + "isinsv;": u"\u22f3", + "isinv;": u"\u2208", + "it;": u"\u2062", + "itilde;": u"\u0129", + "iukcy;": u"\u0456", + "iuml": u"\xef", + "iuml;": u"\xef", + "jcirc;": u"\u0135", + "jcy;": u"\u0439", + "jfr;": u"\U0001d527", + "jmath;": u"\u0237", + "jopf;": u"\U0001d55b", + "jscr;": u"\U0001d4bf", + "jsercy;": u"\u0458", + "jukcy;": u"\u0454", + "kappa;": u"\u03ba", + "kappav;": u"\u03f0", + "kcedil;": u"\u0137", + "kcy;": u"\u043a", + "kfr;": u"\U0001d528", + "kgreen;": u"\u0138", + "khcy;": u"\u0445", + "kjcy;": u"\u045c", + "kopf;": u"\U0001d55c", + "kscr;": u"\U0001d4c0", + "lAarr;": u"\u21da", + "lArr;": u"\u21d0", + "lAtail;": u"\u291b", + "lBarr;": u"\u290e", + "lE;": u"\u2266", + "lEg;": u"\u2a8b", + "lHar;": u"\u2962", + "lacute;": u"\u013a", + "laemptyv;": u"\u29b4", + "lagran;": u"\u2112", + "lambda;": u"\u03bb", + "lang;": u"\u27e8", + "langd;": u"\u2991", + "langle;": u"\u27e8", + "lap;": u"\u2a85", + "laquo": u"\xab", + "laquo;": u"\xab", + "larr;": u"\u2190", + "larrb;": u"\u21e4", + "larrbfs;": u"\u291f", + "larrfs;": u"\u291d", + "larrhk;": u"\u21a9", + "larrlp;": u"\u21ab", + "larrpl;": u"\u2939", + "larrsim;": u"\u2973", + "larrtl;": u"\u21a2", + "lat;": u"\u2aab", + "latail;": u"\u2919", + "late;": u"\u2aad", + "lates;": u"\u2aad\ufe00", + "lbarr;": u"\u290c", + "lbbrk;": u"\u2772", + "lbrace;": u"{", + "lbrack;": u"[", + "lbrke;": u"\u298b", + "lbrksld;": u"\u298f", + "lbrkslu;": u"\u298d", + "lcaron;": u"\u013e", + "lcedil;": u"\u013c", + "lceil;": u"\u2308", + "lcub;": u"{", + "lcy;": u"\u043b", + "ldca;": u"\u2936", + "ldquo;": u"\u201c", + "ldquor;": u"\u201e", + "ldrdhar;": u"\u2967", + "ldrushar;": u"\u294b", + "ldsh;": u"\u21b2", + "le;": u"\u2264", + "leftarrow;": u"\u2190", + "leftarrowtail;": u"\u21a2", + "leftharpoondown;": u"\u21bd", + "leftharpoonup;": u"\u21bc", + "leftleftarrows;": u"\u21c7", + "leftrightarrow;": u"\u2194", + "leftrightarrows;": u"\u21c6", + "leftrightharpoons;": u"\u21cb", + "leftrightsquigarrow;": u"\u21ad", + "leftthreetimes;": u"\u22cb", + "leg;": u"\u22da", + "leq;": u"\u2264", + "leqq;": u"\u2266", + "leqslant;": u"\u2a7d", + "les;": u"\u2a7d", + "lescc;": u"\u2aa8", + "lesdot;": u"\u2a7f", + "lesdoto;": u"\u2a81", + "lesdotor;": u"\u2a83", + "lesg;": u"\u22da\ufe00", + "lesges;": u"\u2a93", + "lessapprox;": u"\u2a85", + "lessdot;": u"\u22d6", + "lesseqgtr;": u"\u22da", + "lesseqqgtr;": u"\u2a8b", + "lessgtr;": u"\u2276", + "lesssim;": u"\u2272", + "lfisht;": u"\u297c", + "lfloor;": u"\u230a", + "lfr;": u"\U0001d529", + "lg;": u"\u2276", + "lgE;": u"\u2a91", + "lhard;": u"\u21bd", + "lharu;": u"\u21bc", + "lharul;": u"\u296a", + "lhblk;": u"\u2584", + "ljcy;": u"\u0459", + "ll;": u"\u226a", + "llarr;": u"\u21c7", + "llcorner;": u"\u231e", + "llhard;": u"\u296b", + "lltri;": u"\u25fa", + "lmidot;": u"\u0140", + "lmoust;": u"\u23b0", + "lmoustache;": u"\u23b0", + "lnE;": u"\u2268", + "lnap;": u"\u2a89", + "lnapprox;": u"\u2a89", + "lne;": u"\u2a87", + "lneq;": u"\u2a87", + "lneqq;": u"\u2268", + "lnsim;": u"\u22e6", + "loang;": u"\u27ec", + "loarr;": u"\u21fd", + "lobrk;": u"\u27e6", + "longleftarrow;": u"\u27f5", + "longleftrightarrow;": u"\u27f7", + "longmapsto;": u"\u27fc", + "longrightarrow;": u"\u27f6", + "looparrowleft;": u"\u21ab", + "looparrowright;": u"\u21ac", + "lopar;": u"\u2985", + "lopf;": u"\U0001d55d", + "loplus;": u"\u2a2d", + "lotimes;": u"\u2a34", + "lowast;": u"\u2217", + "lowbar;": u"_", + "loz;": u"\u25ca", + "lozenge;": u"\u25ca", + "lozf;": u"\u29eb", + "lpar;": u"(", + "lparlt;": u"\u2993", + "lrarr;": u"\u21c6", + "lrcorner;": u"\u231f", + "lrhar;": u"\u21cb", + "lrhard;": u"\u296d", + "lrm;": u"\u200e", + "lrtri;": u"\u22bf", + "lsaquo;": u"\u2039", + "lscr;": u"\U0001d4c1", + "lsh;": u"\u21b0", + "lsim;": u"\u2272", + "lsime;": u"\u2a8d", + "lsimg;": u"\u2a8f", + "lsqb;": u"[", + "lsquo;": u"\u2018", + "lsquor;": u"\u201a", + "lstrok;": u"\u0142", + "lt": u"<", + "lt;": u"<", + "ltcc;": u"\u2aa6", + "ltcir;": u"\u2a79", + "ltdot;": u"\u22d6", + "lthree;": u"\u22cb", + "ltimes;": u"\u22c9", + "ltlarr;": u"\u2976", + "ltquest;": u"\u2a7b", + "ltrPar;": u"\u2996", + "ltri;": u"\u25c3", + "ltrie;": u"\u22b4", + "ltrif;": u"\u25c2", + "lurdshar;": u"\u294a", + "luruhar;": u"\u2966", + "lvertneqq;": u"\u2268\ufe00", + "lvnE;": u"\u2268\ufe00", + "mDDot;": u"\u223a", + "macr": u"\xaf", + "macr;": u"\xaf", + "male;": u"\u2642", + "malt;": u"\u2720", + "maltese;": u"\u2720", + "map;": u"\u21a6", + "mapsto;": u"\u21a6", + "mapstodown;": u"\u21a7", + "mapstoleft;": u"\u21a4", + "mapstoup;": u"\u21a5", + "marker;": u"\u25ae", + "mcomma;": u"\u2a29", + "mcy;": u"\u043c", + "mdash;": u"\u2014", + "measuredangle;": u"\u2221", + "mfr;": u"\U0001d52a", + "mho;": u"\u2127", + "micro": u"\xb5", + "micro;": u"\xb5", + "mid;": u"\u2223", + "midast;": u"*", + "midcir;": u"\u2af0", + "middot": u"\xb7", + "middot;": u"\xb7", + "minus;": u"\u2212", + "minusb;": u"\u229f", + "minusd;": u"\u2238", + "minusdu;": u"\u2a2a", + "mlcp;": u"\u2adb", + "mldr;": u"\u2026", + "mnplus;": u"\u2213", + "models;": u"\u22a7", + "mopf;": u"\U0001d55e", + "mp;": u"\u2213", + "mscr;": u"\U0001d4c2", + "mstpos;": u"\u223e", + "mu;": u"\u03bc", + "multimap;": u"\u22b8", + "mumap;": u"\u22b8", + "nGg;": u"\u22d9\u0338", + "nGt;": u"\u226b\u20d2", + "nGtv;": u"\u226b\u0338", + "nLeftarrow;": u"\u21cd", + "nLeftrightarrow;": u"\u21ce", + "nLl;": u"\u22d8\u0338", + "nLt;": u"\u226a\u20d2", + "nLtv;": u"\u226a\u0338", + "nRightarrow;": u"\u21cf", + "nVDash;": u"\u22af", + "nVdash;": u"\u22ae", + "nabla;": u"\u2207", + "nacute;": u"\u0144", + "nang;": u"\u2220\u20d2", + "nap;": u"\u2249", + "napE;": u"\u2a70\u0338", + "napid;": u"\u224b\u0338", + "napos;": u"\u0149", + "napprox;": u"\u2249", + "natur;": u"\u266e", + "natural;": u"\u266e", + "naturals;": u"\u2115", + "nbsp": u"\xa0", + "nbsp;": u"\xa0", + "nbump;": u"\u224e\u0338", + "nbumpe;": u"\u224f\u0338", + "ncap;": u"\u2a43", + "ncaron;": u"\u0148", + "ncedil;": u"\u0146", + "ncong;": u"\u2247", + "ncongdot;": u"\u2a6d\u0338", + "ncup;": u"\u2a42", + "ncy;": u"\u043d", + "ndash;": u"\u2013", + "ne;": u"\u2260", + "neArr;": u"\u21d7", + "nearhk;": u"\u2924", + "nearr;": u"\u2197", + "nearrow;": u"\u2197", + "nedot;": u"\u2250\u0338", + "nequiv;": u"\u2262", + "nesear;": u"\u2928", + "nesim;": u"\u2242\u0338", + "nexist;": u"\u2204", + "nexists;": u"\u2204", + "nfr;": u"\U0001d52b", + "ngE;": u"\u2267\u0338", + "nge;": u"\u2271", + "ngeq;": u"\u2271", + "ngeqq;": u"\u2267\u0338", + "ngeqslant;": u"\u2a7e\u0338", + "nges;": u"\u2a7e\u0338", + "ngsim;": u"\u2275", + "ngt;": u"\u226f", + "ngtr;": u"\u226f", + "nhArr;": u"\u21ce", + "nharr;": u"\u21ae", + "nhpar;": u"\u2af2", + "ni;": u"\u220b", + "nis;": u"\u22fc", + "nisd;": u"\u22fa", + "niv;": u"\u220b", + "njcy;": u"\u045a", + "nlArr;": u"\u21cd", + "nlE;": u"\u2266\u0338", + "nlarr;": u"\u219a", + "nldr;": u"\u2025", + "nle;": u"\u2270", + "nleftarrow;": u"\u219a", + "nleftrightarrow;": u"\u21ae", + "nleq;": u"\u2270", + "nleqq;": u"\u2266\u0338", + "nleqslant;": u"\u2a7d\u0338", + "nles;": u"\u2a7d\u0338", + "nless;": u"\u226e", + "nlsim;": u"\u2274", + "nlt;": u"\u226e", + "nltri;": u"\u22ea", + "nltrie;": u"\u22ec", + "nmid;": u"\u2224", + "nopf;": u"\U0001d55f", + "not": u"\xac", + "not;": u"\xac", + "notin;": u"\u2209", + "notinE;": u"\u22f9\u0338", + "notindot;": u"\u22f5\u0338", + "notinva;": u"\u2209", + "notinvb;": u"\u22f7", + "notinvc;": u"\u22f6", + "notni;": u"\u220c", + "notniva;": u"\u220c", + "notnivb;": u"\u22fe", + "notnivc;": u"\u22fd", + "npar;": u"\u2226", + "nparallel;": u"\u2226", + "nparsl;": u"\u2afd\u20e5", + "npart;": u"\u2202\u0338", + "npolint;": u"\u2a14", + "npr;": u"\u2280", + "nprcue;": u"\u22e0", + "npre;": u"\u2aaf\u0338", + "nprec;": u"\u2280", + "npreceq;": u"\u2aaf\u0338", + "nrArr;": u"\u21cf", + "nrarr;": u"\u219b", + "nrarrc;": u"\u2933\u0338", + "nrarrw;": u"\u219d\u0338", + "nrightarrow;": u"\u219b", + "nrtri;": u"\u22eb", + "nrtrie;": u"\u22ed", + "nsc;": u"\u2281", + "nsccue;": u"\u22e1", + "nsce;": u"\u2ab0\u0338", + "nscr;": u"\U0001d4c3", + "nshortmid;": u"\u2224", + "nshortparallel;": u"\u2226", + "nsim;": u"\u2241", + "nsime;": u"\u2244", + "nsimeq;": u"\u2244", + "nsmid;": u"\u2224", + "nspar;": u"\u2226", + "nsqsube;": u"\u22e2", + "nsqsupe;": u"\u22e3", + "nsub;": u"\u2284", + "nsubE;": u"\u2ac5\u0338", + "nsube;": u"\u2288", + "nsubset;": u"\u2282\u20d2", + "nsubseteq;": u"\u2288", + "nsubseteqq;": u"\u2ac5\u0338", + "nsucc;": u"\u2281", + "nsucceq;": u"\u2ab0\u0338", + "nsup;": u"\u2285", + "nsupE;": u"\u2ac6\u0338", + "nsupe;": u"\u2289", + "nsupset;": u"\u2283\u20d2", + "nsupseteq;": u"\u2289", + "nsupseteqq;": u"\u2ac6\u0338", + "ntgl;": u"\u2279", + "ntilde": u"\xf1", + "ntilde;": u"\xf1", + "ntlg;": u"\u2278", + "ntriangleleft;": u"\u22ea", + "ntrianglelefteq;": u"\u22ec", + "ntriangleright;": u"\u22eb", + "ntrianglerighteq;": u"\u22ed", + "nu;": u"\u03bd", + "num;": u"#", + "numero;": u"\u2116", + "numsp;": u"\u2007", + "nvDash;": u"\u22ad", + "nvHarr;": u"\u2904", + "nvap;": u"\u224d\u20d2", + "nvdash;": u"\u22ac", + "nvge;": u"\u2265\u20d2", + "nvgt;": u">\u20d2", + "nvinfin;": u"\u29de", + "nvlArr;": u"\u2902", + "nvle;": u"\u2264\u20d2", + "nvlt;": u"<\u20d2", + "nvltrie;": u"\u22b4\u20d2", + "nvrArr;": u"\u2903", + "nvrtrie;": u"\u22b5\u20d2", + "nvsim;": u"\u223c\u20d2", + "nwArr;": u"\u21d6", + "nwarhk;": u"\u2923", + "nwarr;": u"\u2196", + "nwarrow;": u"\u2196", + "nwnear;": u"\u2927", + "oS;": u"\u24c8", + "oacute": u"\xf3", + "oacute;": u"\xf3", + "oast;": u"\u229b", + "ocir;": u"\u229a", + "ocirc": u"\xf4", + "ocirc;": u"\xf4", + "ocy;": u"\u043e", + "odash;": u"\u229d", + "odblac;": u"\u0151", + "odiv;": u"\u2a38", + "odot;": u"\u2299", + "odsold;": u"\u29bc", + "oelig;": u"\u0153", + "ofcir;": u"\u29bf", + "ofr;": u"\U0001d52c", + "ogon;": u"\u02db", + "ograve": u"\xf2", + "ograve;": u"\xf2", + "ogt;": u"\u29c1", + "ohbar;": u"\u29b5", + "ohm;": u"\u03a9", + "oint;": u"\u222e", + "olarr;": u"\u21ba", + "olcir;": u"\u29be", + "olcross;": u"\u29bb", + "oline;": u"\u203e", + "olt;": u"\u29c0", + "omacr;": u"\u014d", + "omega;": u"\u03c9", + "omicron;": u"\u03bf", + "omid;": u"\u29b6", + "ominus;": u"\u2296", + "oopf;": u"\U0001d560", + "opar;": u"\u29b7", + "operp;": u"\u29b9", + "oplus;": u"\u2295", + "or;": u"\u2228", + "orarr;": u"\u21bb", + "ord;": u"\u2a5d", + "order;": u"\u2134", + "orderof;": u"\u2134", + "ordf": u"\xaa", + "ordf;": u"\xaa", + "ordm": u"\xba", + "ordm;": u"\xba", + "origof;": u"\u22b6", + "oror;": u"\u2a56", + "orslope;": u"\u2a57", + "orv;": u"\u2a5b", + "oscr;": u"\u2134", + "oslash": u"\xf8", + "oslash;": u"\xf8", + "osol;": u"\u2298", + "otilde": u"\xf5", + "otilde;": u"\xf5", + "otimes;": u"\u2297", + "otimesas;": u"\u2a36", + "ouml": u"\xf6", + "ouml;": u"\xf6", + "ovbar;": u"\u233d", + "par;": u"\u2225", + "para": u"\xb6", + "para;": u"\xb6", + "parallel;": u"\u2225", + "parsim;": u"\u2af3", + "parsl;": u"\u2afd", + "part;": u"\u2202", + "pcy;": u"\u043f", + "percnt;": u"%", + "period;": u".", + "permil;": u"\u2030", + "perp;": u"\u22a5", + "pertenk;": u"\u2031", + "pfr;": u"\U0001d52d", + "phi;": u"\u03c6", + "phiv;": u"\u03d5", + "phmmat;": u"\u2133", + "phone;": u"\u260e", + "pi;": u"\u03c0", + "pitchfork;": u"\u22d4", + "piv;": u"\u03d6", + "planck;": u"\u210f", + "planckh;": u"\u210e", + "plankv;": u"\u210f", + "plus;": u"+", + "plusacir;": u"\u2a23", + "plusb;": u"\u229e", + "pluscir;": u"\u2a22", + "plusdo;": u"\u2214", + "plusdu;": u"\u2a25", + "pluse;": u"\u2a72", + "plusmn": u"\xb1", + "plusmn;": u"\xb1", + "plussim;": u"\u2a26", + "plustwo;": u"\u2a27", + "pm;": u"\xb1", + "pointint;": u"\u2a15", + "popf;": u"\U0001d561", + "pound": u"\xa3", + "pound;": u"\xa3", + "pr;": u"\u227a", + "prE;": u"\u2ab3", + "prap;": u"\u2ab7", + "prcue;": u"\u227c", + "pre;": u"\u2aaf", + "prec;": u"\u227a", + "precapprox;": u"\u2ab7", + "preccurlyeq;": u"\u227c", + "preceq;": u"\u2aaf", + "precnapprox;": u"\u2ab9", + "precneqq;": u"\u2ab5", + "precnsim;": u"\u22e8", + "precsim;": u"\u227e", + "prime;": u"\u2032", + "primes;": u"\u2119", + "prnE;": u"\u2ab5", + "prnap;": u"\u2ab9", + "prnsim;": u"\u22e8", + "prod;": u"\u220f", + "profalar;": u"\u232e", + "profline;": u"\u2312", + "profsurf;": u"\u2313", + "prop;": u"\u221d", + "propto;": u"\u221d", + "prsim;": u"\u227e", + "prurel;": u"\u22b0", + "pscr;": u"\U0001d4c5", + "psi;": u"\u03c8", + "puncsp;": u"\u2008", + "qfr;": u"\U0001d52e", + "qint;": u"\u2a0c", + "qopf;": u"\U0001d562", + "qprime;": u"\u2057", + "qscr;": u"\U0001d4c6", + "quaternions;": u"\u210d", + "quatint;": u"\u2a16", + "quest;": u"?", + "questeq;": u"\u225f", + "quot": u"\"", + "quot;": u"\"", + "rAarr;": u"\u21db", + "rArr;": u"\u21d2", + "rAtail;": u"\u291c", + "rBarr;": u"\u290f", + "rHar;": u"\u2964", + "race;": u"\u223d\u0331", + "racute;": u"\u0155", + "radic;": u"\u221a", + "raemptyv;": u"\u29b3", + "rang;": u"\u27e9", + "rangd;": u"\u2992", + "range;": u"\u29a5", + "rangle;": u"\u27e9", + "raquo": u"\xbb", + "raquo;": u"\xbb", + "rarr;": u"\u2192", + "rarrap;": u"\u2975", + "rarrb;": u"\u21e5", + "rarrbfs;": u"\u2920", + "rarrc;": u"\u2933", + "rarrfs;": u"\u291e", + "rarrhk;": u"\u21aa", + "rarrlp;": u"\u21ac", + "rarrpl;": u"\u2945", + "rarrsim;": u"\u2974", + "rarrtl;": u"\u21a3", + "rarrw;": u"\u219d", + "ratail;": u"\u291a", + "ratio;": u"\u2236", + "rationals;": u"\u211a", + "rbarr;": u"\u290d", + "rbbrk;": u"\u2773", + "rbrace;": u"}", + "rbrack;": u"]", + "rbrke;": u"\u298c", + "rbrksld;": u"\u298e", + "rbrkslu;": u"\u2990", + "rcaron;": u"\u0159", + "rcedil;": u"\u0157", + "rceil;": u"\u2309", + "rcub;": u"}", + "rcy;": u"\u0440", + "rdca;": u"\u2937", + "rdldhar;": u"\u2969", + "rdquo;": u"\u201d", + "rdquor;": u"\u201d", + "rdsh;": u"\u21b3", + "real;": u"\u211c", + "realine;": u"\u211b", + "realpart;": u"\u211c", + "reals;": u"\u211d", + "rect;": u"\u25ad", + "reg": u"\xae", + "reg;": u"\xae", + "rfisht;": u"\u297d", + "rfloor;": u"\u230b", + "rfr;": u"\U0001d52f", + "rhard;": u"\u21c1", + "rharu;": u"\u21c0", + "rharul;": u"\u296c", + "rho;": u"\u03c1", + "rhov;": u"\u03f1", + "rightarrow;": u"\u2192", + "rightarrowtail;": u"\u21a3", + "rightharpoondown;": u"\u21c1", + "rightharpoonup;": u"\u21c0", + "rightleftarrows;": u"\u21c4", + "rightleftharpoons;": u"\u21cc", + "rightrightarrows;": u"\u21c9", + "rightsquigarrow;": u"\u219d", + "rightthreetimes;": u"\u22cc", + "ring;": u"\u02da", + "risingdotseq;": u"\u2253", + "rlarr;": u"\u21c4", + "rlhar;": u"\u21cc", + "rlm;": u"\u200f", + "rmoust;": u"\u23b1", + "rmoustache;": u"\u23b1", + "rnmid;": u"\u2aee", + "roang;": u"\u27ed", + "roarr;": u"\u21fe", + "robrk;": u"\u27e7", + "ropar;": u"\u2986", + "ropf;": u"\U0001d563", + "roplus;": u"\u2a2e", + "rotimes;": u"\u2a35", + "rpar;": u")", + "rpargt;": u"\u2994", + "rppolint;": u"\u2a12", + "rrarr;": u"\u21c9", + "rsaquo;": u"\u203a", + "rscr;": u"\U0001d4c7", + "rsh;": u"\u21b1", + "rsqb;": u"]", + "rsquo;": u"\u2019", + "rsquor;": u"\u2019", + "rthree;": u"\u22cc", + "rtimes;": u"\u22ca", + "rtri;": u"\u25b9", + "rtrie;": u"\u22b5", + "rtrif;": u"\u25b8", + "rtriltri;": u"\u29ce", + "ruluhar;": u"\u2968", + "rx;": u"\u211e", + "sacute;": u"\u015b", + "sbquo;": u"\u201a", + "sc;": u"\u227b", + "scE;": u"\u2ab4", + "scap;": u"\u2ab8", + "scaron;": u"\u0161", + "sccue;": u"\u227d", + "sce;": u"\u2ab0", + "scedil;": u"\u015f", + "scirc;": u"\u015d", + "scnE;": u"\u2ab6", + "scnap;": u"\u2aba", + "scnsim;": u"\u22e9", + "scpolint;": u"\u2a13", + "scsim;": u"\u227f", + "scy;": u"\u0441", + "sdot;": u"\u22c5", + "sdotb;": u"\u22a1", + "sdote;": u"\u2a66", + "seArr;": u"\u21d8", + "searhk;": u"\u2925", + "searr;": u"\u2198", + "searrow;": u"\u2198", + "sect": u"\xa7", + "sect;": u"\xa7", + "semi;": u";", + "seswar;": u"\u2929", + "setminus;": u"\u2216", + "setmn;": u"\u2216", + "sext;": u"\u2736", + "sfr;": u"\U0001d530", + "sfrown;": u"\u2322", + "sharp;": u"\u266f", + "shchcy;": u"\u0449", + "shcy;": u"\u0448", + "shortmid;": u"\u2223", + "shortparallel;": u"\u2225", + "shy": u"\xad", + "shy;": u"\xad", + "sigma;": u"\u03c3", + "sigmaf;": u"\u03c2", + "sigmav;": u"\u03c2", + "sim;": u"\u223c", + "simdot;": u"\u2a6a", + "sime;": u"\u2243", + "simeq;": u"\u2243", + "simg;": u"\u2a9e", + "simgE;": u"\u2aa0", + "siml;": u"\u2a9d", + "simlE;": u"\u2a9f", + "simne;": u"\u2246", + "simplus;": u"\u2a24", + "simrarr;": u"\u2972", + "slarr;": u"\u2190", + "smallsetminus;": u"\u2216", + "smashp;": u"\u2a33", + "smeparsl;": u"\u29e4", + "smid;": u"\u2223", + "smile;": u"\u2323", + "smt;": u"\u2aaa", + "smte;": u"\u2aac", + "smtes;": u"\u2aac\ufe00", + "softcy;": u"\u044c", + "sol;": u"/", + "solb;": u"\u29c4", + "solbar;": u"\u233f", + "sopf;": u"\U0001d564", + "spades;": u"\u2660", + "spadesuit;": u"\u2660", + "spar;": u"\u2225", + "sqcap;": u"\u2293", + "sqcaps;": u"\u2293\ufe00", + "sqcup;": u"\u2294", + "sqcups;": u"\u2294\ufe00", + "sqsub;": u"\u228f", + "sqsube;": u"\u2291", + "sqsubset;": u"\u228f", + "sqsubseteq;": u"\u2291", + "sqsup;": u"\u2290", + "sqsupe;": u"\u2292", + "sqsupset;": u"\u2290", + "sqsupseteq;": u"\u2292", + "squ;": u"\u25a1", + "square;": u"\u25a1", + "squarf;": u"\u25aa", + "squf;": u"\u25aa", + "srarr;": u"\u2192", + "sscr;": u"\U0001d4c8", + "ssetmn;": u"\u2216", + "ssmile;": u"\u2323", + "sstarf;": u"\u22c6", + "star;": u"\u2606", + "starf;": u"\u2605", + "straightepsilon;": u"\u03f5", + "straightphi;": u"\u03d5", + "strns;": u"\xaf", + "sub;": u"\u2282", + "subE;": u"\u2ac5", + "subdot;": u"\u2abd", + "sube;": u"\u2286", + "subedot;": u"\u2ac3", + "submult;": u"\u2ac1", + "subnE;": u"\u2acb", + "subne;": u"\u228a", + "subplus;": u"\u2abf", + "subrarr;": u"\u2979", + "subset;": u"\u2282", + "subseteq;": u"\u2286", + "subseteqq;": u"\u2ac5", + "subsetneq;": u"\u228a", + "subsetneqq;": u"\u2acb", + "subsim;": u"\u2ac7", + "subsub;": u"\u2ad5", + "subsup;": u"\u2ad3", + "succ;": u"\u227b", + "succapprox;": u"\u2ab8", + "succcurlyeq;": u"\u227d", + "succeq;": u"\u2ab0", + "succnapprox;": u"\u2aba", + "succneqq;": u"\u2ab6", + "succnsim;": u"\u22e9", + "succsim;": u"\u227f", + "sum;": u"\u2211", + "sung;": u"\u266a", + "sup1": u"\xb9", + "sup1;": u"\xb9", + "sup2": u"\xb2", + "sup2;": u"\xb2", + "sup3": u"\xb3", + "sup3;": u"\xb3", + "sup;": u"\u2283", + "supE;": u"\u2ac6", + "supdot;": u"\u2abe", + "supdsub;": u"\u2ad8", + "supe;": u"\u2287", + "supedot;": u"\u2ac4", + "suphsol;": u"\u27c9", + "suphsub;": u"\u2ad7", + "suplarr;": u"\u297b", + "supmult;": u"\u2ac2", + "supnE;": u"\u2acc", + "supne;": u"\u228b", + "supplus;": u"\u2ac0", + "supset;": u"\u2283", + "supseteq;": u"\u2287", + "supseteqq;": u"\u2ac6", + "supsetneq;": u"\u228b", + "supsetneqq;": u"\u2acc", + "supsim;": u"\u2ac8", + "supsub;": u"\u2ad4", + "supsup;": u"\u2ad6", + "swArr;": u"\u21d9", + "swarhk;": u"\u2926", + "swarr;": u"\u2199", + "swarrow;": u"\u2199", + "swnwar;": u"\u292a", + "szlig": u"\xdf", + "szlig;": u"\xdf", + "target;": u"\u2316", + "tau;": u"\u03c4", + "tbrk;": u"\u23b4", + "tcaron;": u"\u0165", + "tcedil;": u"\u0163", + "tcy;": u"\u0442", + "tdot;": u"\u20db", + "telrec;": u"\u2315", + "tfr;": u"\U0001d531", + "there4;": u"\u2234", + "therefore;": u"\u2234", + "theta;": u"\u03b8", + "thetasym;": u"\u03d1", + "thetav;": u"\u03d1", + "thickapprox;": u"\u2248", + "thicksim;": u"\u223c", + "thinsp;": u"\u2009", + "thkap;": u"\u2248", + "thksim;": u"\u223c", + "thorn": u"\xfe", + "thorn;": u"\xfe", + "tilde;": u"\u02dc", + "times": u"\xd7", + "times;": u"\xd7", + "timesb;": u"\u22a0", + "timesbar;": u"\u2a31", + "timesd;": u"\u2a30", + "tint;": u"\u222d", + "toea;": u"\u2928", + "top;": u"\u22a4", + "topbot;": u"\u2336", + "topcir;": u"\u2af1", + "topf;": u"\U0001d565", + "topfork;": u"\u2ada", + "tosa;": u"\u2929", + "tprime;": u"\u2034", + "trade;": u"\u2122", + "triangle;": u"\u25b5", + "triangledown;": u"\u25bf", + "triangleleft;": u"\u25c3", + "trianglelefteq;": u"\u22b4", + "triangleq;": u"\u225c", + "triangleright;": u"\u25b9", + "trianglerighteq;": u"\u22b5", + "tridot;": u"\u25ec", + "trie;": u"\u225c", + "triminus;": u"\u2a3a", + "triplus;": u"\u2a39", + "trisb;": u"\u29cd", + "tritime;": u"\u2a3b", + "trpezium;": u"\u23e2", + "tscr;": u"\U0001d4c9", + "tscy;": u"\u0446", + "tshcy;": u"\u045b", + "tstrok;": u"\u0167", + "twixt;": u"\u226c", + "twoheadleftarrow;": u"\u219e", + "twoheadrightarrow;": u"\u21a0", + "uArr;": u"\u21d1", + "uHar;": u"\u2963", + "uacute": u"\xfa", + "uacute;": u"\xfa", + "uarr;": u"\u2191", + "ubrcy;": u"\u045e", + "ubreve;": u"\u016d", + "ucirc": u"\xfb", + "ucirc;": u"\xfb", + "ucy;": u"\u0443", + "udarr;": u"\u21c5", + "udblac;": u"\u0171", + "udhar;": u"\u296e", + "ufisht;": u"\u297e", + "ufr;": u"\U0001d532", + "ugrave": u"\xf9", + "ugrave;": u"\xf9", + "uharl;": u"\u21bf", + "uharr;": u"\u21be", + "uhblk;": u"\u2580", + "ulcorn;": u"\u231c", + "ulcorner;": u"\u231c", + "ulcrop;": u"\u230f", + "ultri;": u"\u25f8", + "umacr;": u"\u016b", + "uml": u"\xa8", + "uml;": u"\xa8", + "uogon;": u"\u0173", + "uopf;": u"\U0001d566", + "uparrow;": u"\u2191", + "updownarrow;": u"\u2195", + "upharpoonleft;": u"\u21bf", + "upharpoonright;": u"\u21be", + "uplus;": u"\u228e", + "upsi;": u"\u03c5", + "upsih;": u"\u03d2", + "upsilon;": u"\u03c5", + "upuparrows;": u"\u21c8", + "urcorn;": u"\u231d", + "urcorner;": u"\u231d", + "urcrop;": u"\u230e", + "uring;": u"\u016f", + "urtri;": u"\u25f9", + "uscr;": u"\U0001d4ca", + "utdot;": u"\u22f0", + "utilde;": u"\u0169", + "utri;": u"\u25b5", + "utrif;": u"\u25b4", + "uuarr;": u"\u21c8", + "uuml": u"\xfc", + "uuml;": u"\xfc", + "uwangle;": u"\u29a7", + "vArr;": u"\u21d5", + "vBar;": u"\u2ae8", + "vBarv;": u"\u2ae9", + "vDash;": u"\u22a8", + "vangrt;": u"\u299c", + "varepsilon;": u"\u03f5", + "varkappa;": u"\u03f0", + "varnothing;": u"\u2205", + "varphi;": u"\u03d5", + "varpi;": u"\u03d6", + "varpropto;": u"\u221d", + "varr;": u"\u2195", + "varrho;": u"\u03f1", + "varsigma;": u"\u03c2", + "varsubsetneq;": u"\u228a\ufe00", + "varsubsetneqq;": u"\u2acb\ufe00", + "varsupsetneq;": u"\u228b\ufe00", + "varsupsetneqq;": u"\u2acc\ufe00", + "vartheta;": u"\u03d1", + "vartriangleleft;": u"\u22b2", + "vartriangleright;": u"\u22b3", + "vcy;": u"\u0432", + "vdash;": u"\u22a2", + "vee;": u"\u2228", + "veebar;": u"\u22bb", + "veeeq;": u"\u225a", + "vellip;": u"\u22ee", + "verbar;": u"|", + "vert;": u"|", + "vfr;": u"\U0001d533", + "vltri;": u"\u22b2", + "vnsub;": u"\u2282\u20d2", + "vnsup;": u"\u2283\u20d2", + "vopf;": u"\U0001d567", + "vprop;": u"\u221d", + "vrtri;": u"\u22b3", + "vscr;": u"\U0001d4cb", + "vsubnE;": u"\u2acb\ufe00", + "vsubne;": u"\u228a\ufe00", + "vsupnE;": u"\u2acc\ufe00", + "vsupne;": u"\u228b\ufe00", + "vzigzag;": u"\u299a", + "wcirc;": u"\u0175", + "wedbar;": u"\u2a5f", + "wedge;": u"\u2227", + "wedgeq;": u"\u2259", + "weierp;": u"\u2118", + "wfr;": u"\U0001d534", + "wopf;": u"\U0001d568", + "wp;": u"\u2118", + "wr;": u"\u2240", + "wreath;": u"\u2240", + "wscr;": u"\U0001d4cc", + "xcap;": u"\u22c2", + "xcirc;": u"\u25ef", + "xcup;": u"\u22c3", + "xdtri;": u"\u25bd", + "xfr;": u"\U0001d535", + "xhArr;": u"\u27fa", + "xharr;": u"\u27f7", + "xi;": u"\u03be", + "xlArr;": u"\u27f8", + "xlarr;": u"\u27f5", + "xmap;": u"\u27fc", + "xnis;": u"\u22fb", + "xodot;": u"\u2a00", + "xopf;": u"\U0001d569", + "xoplus;": u"\u2a01", + "xotime;": u"\u2a02", + "xrArr;": u"\u27f9", + "xrarr;": u"\u27f6", + "xscr;": u"\U0001d4cd", + "xsqcup;": u"\u2a06", + "xuplus;": u"\u2a04", + "xutri;": u"\u25b3", + "xvee;": u"\u22c1", + "xwedge;": u"\u22c0", + "yacute": u"\xfd", + "yacute;": u"\xfd", + "yacy;": u"\u044f", + "ycirc;": u"\u0177", + "ycy;": u"\u044b", + "yen": u"\xa5", + "yen;": u"\xa5", + "yfr;": u"\U0001d536", + "yicy;": u"\u0457", + "yopf;": u"\U0001d56a", + "yscr;": u"\U0001d4ce", + "yucy;": u"\u044e", + "yuml": u"\xff", + "yuml;": u"\xff", + "zacute;": u"\u017a", + "zcaron;": u"\u017e", + "zcy;": u"\u0437", + "zdot;": u"\u017c", + "zeetrf;": u"\u2128", + "zeta;": u"\u03b6", + "zfr;": u"\U0001d537", + "zhcy;": u"\u0436", + "zigrarr;": u"\u21dd", + "zopf;": u"\U0001d56b", + "zscr;": u"\U0001d4cf", + "zwj;": u"\u200d", + "zwnj;": u"\u200c", +} + +replacementCharacters = { + 0x0:u"\uFFFD", + 0x0d:u"\u000D", + 0x80:u"\u20AC", + 0x81:u"\u0081", + 0x81:u"\u0081", + 0x82:u"\u201A", + 0x83:u"\u0192", + 0x84:u"\u201E", + 0x85:u"\u2026", + 0x86:u"\u2020", + 0x87:u"\u2021", + 0x88:u"\u02C6", + 0x89:u"\u2030", + 0x8A:u"\u0160", + 0x8B:u"\u2039", + 0x8C:u"\u0152", + 0x8D:u"\u008D", + 0x8E:u"\u017D", + 0x8F:u"\u008F", + 0x90:u"\u0090", + 0x91:u"\u2018", + 0x92:u"\u2019", + 0x93:u"\u201C", + 0x94:u"\u201D", + 0x95:u"\u2022", + 0x96:u"\u2013", + 0x97:u"\u2014", + 0x98:u"\u02DC", + 0x99:u"\u2122", + 0x9A:u"\u0161", + 0x9B:u"\u203A", + 0x9C:u"\u0153", + 0x9D:u"\u009D", + 0x9E:u"\u017E", + 0x9F:u"\u0178", +} + +encodings = { + '437': 'cp437', + '850': 'cp850', + '852': 'cp852', + '855': 'cp855', + '857': 'cp857', + '860': 'cp860', + '861': 'cp861', + '862': 'cp862', + '863': 'cp863', + '865': 'cp865', + '866': 'cp866', + '869': 'cp869', + 'ansix341968': 'ascii', + 'ansix341986': 'ascii', + 'arabic': 'iso8859-6', + 'ascii': 'ascii', + 'asmo708': 'iso8859-6', + 'big5': 'big5', + 'big5hkscs': 'big5hkscs', + 'chinese': 'gbk', + 'cp037': 'cp037', + 'cp1026': 'cp1026', + 'cp154': 'ptcp154', + 'cp367': 'ascii', + 'cp424': 'cp424', + 'cp437': 'cp437', + 'cp500': 'cp500', + 'cp775': 'cp775', + 'cp819': 'windows-1252', + 'cp850': 'cp850', + 'cp852': 'cp852', + 'cp855': 'cp855', + 'cp857': 'cp857', + 'cp860': 'cp860', + 'cp861': 'cp861', + 'cp862': 'cp862', + 'cp863': 'cp863', + 'cp864': 'cp864', + 'cp865': 'cp865', + 'cp866': 'cp866', + 'cp869': 'cp869', + 'cp936': 'gbk', + 'cpgr': 'cp869', + 'cpis': 'cp861', + 'csascii': 'ascii', + 'csbig5': 'big5', + 'cseuckr': 'cp949', + 'cseucpkdfmtjapanese': 'euc_jp', + 'csgb2312': 'gbk', + 'cshproman8': 'hp-roman8', + 'csibm037': 'cp037', + 'csibm1026': 'cp1026', + 'csibm424': 'cp424', + 'csibm500': 'cp500', + 'csibm855': 'cp855', + 'csibm857': 'cp857', + 'csibm860': 'cp860', + 'csibm861': 'cp861', + 'csibm863': 'cp863', + 'csibm864': 'cp864', + 'csibm865': 'cp865', + 'csibm866': 'cp866', + 'csibm869': 'cp869', + 'csiso2022jp': 'iso2022_jp', + 'csiso2022jp2': 'iso2022_jp_2', + 'csiso2022kr': 'iso2022_kr', + 'csiso58gb231280': 'gbk', + 'csisolatin1': 'windows-1252', + 'csisolatin2': 'iso8859-2', + 'csisolatin3': 'iso8859-3', + 'csisolatin4': 'iso8859-4', + 'csisolatin5': 'windows-1254', + 'csisolatin6': 'iso8859-10', + 'csisolatinarabic': 'iso8859-6', + 'csisolatincyrillic': 'iso8859-5', + 'csisolatingreek': 'iso8859-7', + 'csisolatinhebrew': 'iso8859-8', + 'cskoi8r': 'koi8-r', + 'csksc56011987': 'cp949', + 'cspc775baltic': 'cp775', + 'cspc850multilingual': 'cp850', + 'cspc862latinhebrew': 'cp862', + 'cspc8codepage437': 'cp437', + 'cspcp852': 'cp852', + 'csptcp154': 'ptcp154', + 'csshiftjis': 'shift_jis', + 'csunicode11utf7': 'utf-7', + 'cyrillic': 'iso8859-5', + 'cyrillicasian': 'ptcp154', + 'ebcdiccpbe': 'cp500', + 'ebcdiccpca': 'cp037', + 'ebcdiccpch': 'cp500', + 'ebcdiccphe': 'cp424', + 'ebcdiccpnl': 'cp037', + 'ebcdiccpus': 'cp037', + 'ebcdiccpwt': 'cp037', + 'ecma114': 'iso8859-6', + 'ecma118': 'iso8859-7', + 'elot928': 'iso8859-7', + 'eucjp': 'euc_jp', + 'euckr': 'cp949', + 'extendedunixcodepackedformatforjapanese': 'euc_jp', + 'gb18030': 'gb18030', + 'gb2312': 'gbk', + 'gb231280': 'gbk', + 'gbk': 'gbk', + 'greek': 'iso8859-7', + 'greek8': 'iso8859-7', + 'hebrew': 'iso8859-8', + 'hproman8': 'hp-roman8', + 'hzgb2312': 'hz', + 'ibm037': 'cp037', + 'ibm1026': 'cp1026', + 'ibm367': 'ascii', + 'ibm424': 'cp424', + 'ibm437': 'cp437', + 'ibm500': 'cp500', + 'ibm775': 'cp775', + 'ibm819': 'windows-1252', + 'ibm850': 'cp850', + 'ibm852': 'cp852', + 'ibm855': 'cp855', + 'ibm857': 'cp857', + 'ibm860': 'cp860', + 'ibm861': 'cp861', + 'ibm862': 'cp862', + 'ibm863': 'cp863', + 'ibm864': 'cp864', + 'ibm865': 'cp865', + 'ibm866': 'cp866', + 'ibm869': 'cp869', + 'iso2022jp': 'iso2022_jp', + 'iso2022jp2': 'iso2022_jp_2', + 'iso2022kr': 'iso2022_kr', + 'iso646irv1991': 'ascii', + 'iso646us': 'ascii', + 'iso88591': 'windows-1252', + 'iso885910': 'iso8859-10', + 'iso8859101992': 'iso8859-10', + 'iso885911987': 'windows-1252', + 'iso885913': 'iso8859-13', + 'iso885914': 'iso8859-14', + 'iso8859141998': 'iso8859-14', + 'iso885915': 'iso8859-15', + 'iso885916': 'iso8859-16', + 'iso8859162001': 'iso8859-16', + 'iso88592': 'iso8859-2', + 'iso885921987': 'iso8859-2', + 'iso88593': 'iso8859-3', + 'iso885931988': 'iso8859-3', + 'iso88594': 'iso8859-4', + 'iso885941988': 'iso8859-4', + 'iso88595': 'iso8859-5', + 'iso885951988': 'iso8859-5', + 'iso88596': 'iso8859-6', + 'iso885961987': 'iso8859-6', + 'iso88597': 'iso8859-7', + 'iso885971987': 'iso8859-7', + 'iso88598': 'iso8859-8', + 'iso885981988': 'iso8859-8', + 'iso88599': 'windows-1254', + 'iso885991989': 'windows-1254', + 'isoceltic': 'iso8859-14', + 'isoir100': 'windows-1252', + 'isoir101': 'iso8859-2', + 'isoir109': 'iso8859-3', + 'isoir110': 'iso8859-4', + 'isoir126': 'iso8859-7', + 'isoir127': 'iso8859-6', + 'isoir138': 'iso8859-8', + 'isoir144': 'iso8859-5', + 'isoir148': 'windows-1254', + 'isoir149': 'cp949', + 'isoir157': 'iso8859-10', + 'isoir199': 'iso8859-14', + 'isoir226': 'iso8859-16', + 'isoir58': 'gbk', + 'isoir6': 'ascii', + 'koi8r': 'koi8-r', + 'koi8u': 'koi8-u', + 'korean': 'cp949', + 'ksc5601': 'cp949', + 'ksc56011987': 'cp949', + 'ksc56011989': 'cp949', + 'l1': 'windows-1252', + 'l10': 'iso8859-16', + 'l2': 'iso8859-2', + 'l3': 'iso8859-3', + 'l4': 'iso8859-4', + 'l5': 'windows-1254', + 'l6': 'iso8859-10', + 'l8': 'iso8859-14', + 'latin1': 'windows-1252', + 'latin10': 'iso8859-16', + 'latin2': 'iso8859-2', + 'latin3': 'iso8859-3', + 'latin4': 'iso8859-4', + 'latin5': 'windows-1254', + 'latin6': 'iso8859-10', + 'latin8': 'iso8859-14', + 'latin9': 'iso8859-15', + 'ms936': 'gbk', + 'mskanji': 'shift_jis', + 'pt154': 'ptcp154', + 'ptcp154': 'ptcp154', + 'r8': 'hp-roman8', + 'roman8': 'hp-roman8', + 'shiftjis': 'shift_jis', + 'tis620': 'cp874', + 'unicode11utf7': 'utf-7', + 'us': 'ascii', + 'usascii': 'ascii', + 'utf16': 'utf-16', + 'utf16be': 'utf-16-be', + 'utf16le': 'utf-16-le', + 'utf8': 'utf-8', + 'windows1250': 'cp1250', + 'windows1251': 'cp1251', + 'windows1252': 'cp1252', + 'windows1253': 'cp1253', + 'windows1254': 'cp1254', + 'windows1255': 'cp1255', + 'windows1256': 'cp1256', + 'windows1257': 'cp1257', + 'windows1258': 'cp1258', + 'windows936': 'gbk', + 'x-x-big5': 'big5'} + +tokenTypes = { + "Doctype":0, + "Characters":1, + "SpaceCharacters":2, + "StartTag":3, + "EndTag":4, + "EmptyTag":5, + "Comment":6, + "ParseError":7 +} + +tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], + tokenTypes["EmptyTag"])) + + +prefixes = dict([(v,k) for k,v in namespaces.iteritems()]) +prefixes["http://www.w3.org/1998/Math/MathML"] = "math" + +class DataLossWarning(UserWarning): + pass + +class ReparseException(Exception): + pass diff --git a/html5lib/filters/__init__.py b/html5lib/filters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/html5lib/filters/_base.py b/html5lib/filters/_base.py new file mode 100644 index 00000000..bca94ada --- /dev/null +++ b/html5lib/filters/_base.py @@ -0,0 +1,10 @@ + +class Filter(object): + def __init__(self, source): + self.source = source + + def __iter__(self): + return iter(self.source) + + def __getattr__(self, name): + return getattr(self.source, name) diff --git a/html5lib/filters/formfiller.py b/html5lib/filters/formfiller.py new file mode 100644 index 00000000..94001714 --- /dev/null +++ b/html5lib/filters/formfiller.py @@ -0,0 +1,127 @@ +# +# The goal is to finally have a form filler where you pass data for +# each form, using the algorithm for "Seeding a form with initial values" +# See http://www.whatwg.org/specs/web-forms/current-work/#seeding +# + +import _base + +from html5lib.constants import spaceCharacters +spaceCharacters = u"".join(spaceCharacters) + +class SimpleFilter(_base.Filter): + def __init__(self, source, fieldStorage): + _base.Filter.__init__(self, source) + self.fieldStorage = fieldStorage + + def __iter__(self): + field_indices = {} + state = None + field_name = None + for token in _base.Filter.__iter__(self): + type = token["type"] + if type in ("StartTag", "EmptyTag"): + name = token["name"].lower() + if name == "input": + field_name = None + field_type = None + input_value_index = -1 + input_checked_index = -1 + for i,(n,v) in enumerate(token["data"]): + n = n.lower() + if n == u"name": + field_name = v.strip(spaceCharacters) + elif n == u"type": + field_type = v.strip(spaceCharacters) + elif n == u"checked": + input_checked_index = i + elif n == u"value": + input_value_index = i + + value_list = self.fieldStorage.getlist(field_name) + field_index = field_indices.setdefault(field_name, 0) + if field_index < len(value_list): + value = value_list[field_index] + else: + value = "" + + if field_type in (u"checkbox", u"radio"): + if value_list: + if token["data"][input_value_index][1] == value: + if input_checked_index < 0: + token["data"].append((u"checked", u"")) + field_indices[field_name] = field_index + 1 + elif input_checked_index >= 0: + del token["data"][input_checked_index] + + elif field_type not in (u"button", u"submit", u"reset"): + if input_value_index >= 0: + token["data"][input_value_index] = (u"value", value) + else: + token["data"].append((u"value", value)) + field_indices[field_name] = field_index + 1 + + field_type = None + field_name = None + + elif name == "textarea": + field_type = "textarea" + field_name = dict((token["data"])[::-1])["name"] + + elif name == "select": + field_type = "select" + attributes = dict(token["data"][::-1]) + field_name = attributes.get("name") + is_select_multiple = "multiple" in attributes + is_selected_option_found = False + + elif field_type == "select" and field_name and name == "option": + option_selected_index = -1 + option_value = None + for i,(n,v) in enumerate(token["data"]): + n = n.lower() + if n == "selected": + option_selected_index = i + elif n == "value": + option_value = v.strip(spaceCharacters) + if option_value is None: + raise NotImplementedError("
      +#errors +Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE. +Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element. +Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element. +Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element. +Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element. +Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element. +Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element. +Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element. +Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element. +Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element. +Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element. +Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element. +Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element. +Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element. +Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element. +Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element. +Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element. +Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element. +Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element. +Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element. +Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element. +Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element. +Line: 1 Col: 130 Unexpected end tag (br). Treated as br element. +Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 140 This element (img) has no end tag. +Line: 1 Col: 148 Unexpected end tag (title). Ignored. +Line: 1 Col: 155 Unexpected end tag (span). Ignored. +Line: 1 Col: 163 Unexpected end tag (style). Ignored. +Line: 1 Col: 172 Unexpected end tag (script). Ignored. +Line: 1 Col: 180 Unexpected end tag (table). Ignored. +Line: 1 Col: 185 Unexpected end tag (th). Ignored. +Line: 1 Col: 190 Unexpected end tag (td). Ignored. +Line: 1 Col: 195 Unexpected end tag (tr). Ignored. +Line: 1 Col: 203 This element (frame) has no end tag. +Line: 1 Col: 210 This element (area) has no end tag. +Line: 1 Col: 217 Unexpected end tag (link). Ignored. +Line: 1 Col: 225 This element (param) has no end tag. +Line: 1 Col: 230 This element (hr) has no end tag. +Line: 1 Col: 238 This element (input) has no end tag. +Line: 1 Col: 244 Unexpected end tag (col). Ignored. +Line: 1 Col: 251 Unexpected end tag (base). Ignored. +Line: 1 Col: 258 Unexpected end tag (meta). Ignored. +Line: 1 Col: 269 This element (basefont) has no end tag. +Line: 1 Col: 279 This element (bgsound) has no end tag. +Line: 1 Col: 287 This element (embed) has no end tag. +Line: 1 Col: 296 This element (spacer) has no end tag. +Line: 1 Col: 300 Unexpected end tag (p). Ignored. +Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag. +Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag. +Line: 1 Col: 320 Unexpected end tag (caption). Ignored. +Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored. +Line: 1 Col: 339 Unexpected end tag (tbody). Ignored. +Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored. +Line: 1 Col: 355 Unexpected end tag (thead). Ignored. +Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag. +Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag. +Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag. +Line: 1 Col: 393 Unexpected end tag (dir). Ignored. +Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag. +Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag. +Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag. +Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag. +Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag. +Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag. +Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag. +Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag. +Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 460 This element (wbr) has no end tag. +Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag. +Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag. +Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag. +Line: 1 Col: 513 Unexpected end tag (html). Ignored. +Line: 1 Col: 513 Unexpected end tag (frameset). Ignored. +Line: 1 Col: 520 Unexpected end tag (head). Ignored. +Line: 1 Col: 529 Unexpected end tag (iframe). Ignored. +Line: 1 Col: 537 This element (image) has no end tag. +Line: 1 Col: 547 This element (isindex) has no end tag. +Line: 1 Col: 557 Unexpected end tag (noembed). Ignored. +Line: 1 Col: 568 Unexpected end tag (noframes). Ignored. +Line: 1 Col: 579 Unexpected end tag (noscript). Ignored. +Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored. +Line: 1 Col: 599 Unexpected end tag (option). Ignored. +Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored. +Line: 1 Col: 622 Unexpected end tag (textarea). Ignored. +#document +| +| +| +|
      +|

      + +#data +

      +#errors +Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE. +Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode. +Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode. +Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode. +Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode. +Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode. +Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode. +Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode. +Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode. +Line: 1 Col: 58 Unexpected end tag (blink). Ignored. +Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode. +Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode. +Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag. +Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode. +Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode. +Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode. +Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode. +Line: 1 Col: 99 Unexpected end tag (select). Ignored. +Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode. +Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag. +Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode. +Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag. +Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode. +Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag. +Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode. +Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag. +Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode. +Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag. +Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode. +Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag. +Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored. +Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode. +Line: 1 Col: 141 Unexpected end tag (br). Treated as br element. +Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode. +Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode. +Line: 1 Col: 151 This element (img) has no end tag. +Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode. +Line: 1 Col: 159 Unexpected end tag (title). Ignored. +Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode. +Line: 1 Col: 166 Unexpected end tag (span). Ignored. +Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode. +Line: 1 Col: 174 Unexpected end tag (style). Ignored. +Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode. +Line: 1 Col: 183 Unexpected end tag (script). Ignored. +Line: 1 Col: 196 Unexpected end tag (th). Ignored. +Line: 1 Col: 201 Unexpected end tag (td). Ignored. +Line: 1 Col: 206 Unexpected end tag (tr). Ignored. +Line: 1 Col: 214 This element (frame) has no end tag. +Line: 1 Col: 221 This element (area) has no end tag. +Line: 1 Col: 228 Unexpected end tag (link). Ignored. +Line: 1 Col: 236 This element (param) has no end tag. +Line: 1 Col: 241 This element (hr) has no end tag. +Line: 1 Col: 249 This element (input) has no end tag. +Line: 1 Col: 255 Unexpected end tag (col). Ignored. +Line: 1 Col: 262 Unexpected end tag (base). Ignored. +Line: 1 Col: 269 Unexpected end tag (meta). Ignored. +Line: 1 Col: 280 This element (basefont) has no end tag. +Line: 1 Col: 290 This element (bgsound) has no end tag. +Line: 1 Col: 298 This element (embed) has no end tag. +Line: 1 Col: 307 This element (spacer) has no end tag. +Line: 1 Col: 311 Unexpected end tag (p). Ignored. +Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag. +Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag. +Line: 1 Col: 331 Unexpected end tag (caption). Ignored. +Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored. +Line: 1 Col: 350 Unexpected end tag (tbody). Ignored. +Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored. +Line: 1 Col: 366 Unexpected end tag (thead). Ignored. +Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag. +Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag. +Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag. +Line: 1 Col: 404 Unexpected end tag (dir). Ignored. +Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag. +Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag. +Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag. +Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag. +Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag. +Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag. +Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag. +Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag. +Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm. +Line: 1 Col: 471 This element (wbr) has no end tag. +Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag. +Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag. +Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag. +Line: 1 Col: 524 Unexpected end tag (html). Ignored. +Line: 1 Col: 524 Unexpected end tag (frameset). Ignored. +Line: 1 Col: 531 Unexpected end tag (head). Ignored. +Line: 1 Col: 540 Unexpected end tag (iframe). Ignored. +Line: 1 Col: 548 This element (image) has no end tag. +Line: 1 Col: 558 This element (isindex) has no end tag. +Line: 1 Col: 568 Unexpected end tag (noembed). Ignored. +Line: 1 Col: 579 Unexpected end tag (noframes). Ignored. +Line: 1 Col: 590 Unexpected end tag (noscript). Ignored. +Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored. +Line: 1 Col: 610 Unexpected end tag (option). Ignored. +Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored. +Line: 1 Col: 633 Unexpected end tag (textarea). Ignored. +#document +| +| +| +|
      +| +| +| +|

      + +#data + +#errors +Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE. +Line: 1 Col: 10 Expected closing tag. Unexpected end of file. +#document +| +| +| diff --git a/html5lib/tests/testdata/tree-construction/tests10.dat b/html5lib/tests/testdata/tree-construction/tests10.dat new file mode 100644 index 00000000..4f8df86f --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests10.dat @@ -0,0 +1,799 @@ +#data + +#errors +#document +| +| +| +| +| + +#data +a +#errors +29: Bogus comment +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| + +#data + +#errors +35: Stray “svg” start tag. +42: Stray end tag “svg” +#document +| +| +| +| +| +#errors +43: Stray “svg” start tag. +50: Stray end tag “svg” +#document +| +| +| +| +|

      +#errors +34: Start tag “svg” seen in “table”. +41: Stray end tag “svg”. +#document +| +| +| +| +| +| + +#data +
      foo
      +#errors +34: Start tag “svg” seen in “table”. +46: Stray end tag “g”. +53: Stray end tag “svg”. +#document +| +| +| +| +| +| +| "foo" +| + +#data +
      foobar
      +#errors +34: Start tag “svg” seen in “table”. +46: Stray end tag “g”. +58: Stray end tag “g”. +65: Stray end tag “svg”. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +| + +#data +
      foobar
      +#errors +41: Start tag “svg” seen in “table”. +53: Stray end tag “g”. +65: Stray end tag “g”. +72: Stray end tag “svg”. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +| +| + +#data +
      foobar
      +#errors +45: Start tag “svg” seen in “table”. +57: Stray end tag “g”. +69: Stray end tag “g”. +76: Stray end tag “svg”. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +| +| +| + +#data +
      foobar
      +#errors +#document +| +| +| +| +| +| +| +|
      +| +| +| "foo" +| +| "bar" + +#data +
      foobar

      baz

      +#errors +#document +| +| +| +| +| +| +| +|
      +| +| +| "foo" +| +| "bar" +|

      +| "baz" + +#data +
      foobar

      baz

      +#errors +#document +| +| +| +| +| +|
      +| +| +| "foo" +| +| "bar" +|

      +| "baz" + +#data +
      foobar

      baz

      quux +#errors +70: HTML start tag “p” in a foreign namespace context. +81: “table” closed but “caption” was still open. +#document +| +| +| +| +| +|
      +| +| +| "foo" +| +| "bar" +|

      +| "baz" +|

      +| "quux" + +#data +
      foobarbaz

      quux +#errors +78: “table” closed but “caption” was still open. +78: Unclosed elements on stack. +#document +| +| +| +| +| +|
      +| +| +| "foo" +| +| "bar" +| "baz" +|

      +| "quux" + +#data +foobar

      baz

      quux +#errors +44: Start tag “svg” seen in “table”. +56: Stray end tag “g”. +68: Stray end tag “g”. +71: HTML start tag “p” in a foreign namespace context. +71: Start tag “p” seen in “table”. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +|

      +| "baz" +| +| +|

      +| "quux" + +#data +

      quux +#errors +50: Stray “svg” start tag. +54: Stray “g” start tag. +62: Stray end tag “g” +66: Stray “g” start tag. +74: Stray end tag “g” +77: Stray “p” start tag. +88: “table” end tag with “select” open. +#document +| +| +| +| +| +| +| +|
      +|

      quux +#errors +36: Start tag “select” seen in “table”. +42: Stray “svg” start tag. +46: Stray “g” start tag. +54: Stray end tag “g” +58: Stray “g” start tag. +66: Stray end tag “g” +69: Stray “p” start tag. +80: “table” end tag with “select” open. +#document +| +| +| +| +| +|

      +| "quux" + +#data +foobar

      baz +#errors +41: Stray “svg” start tag. +68: HTML start tag “p” in a foreign namespace context. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +|

      +| "baz" + +#data +foobar

      baz +#errors +34: Stray “svg” start tag. +61: HTML start tag “p” in a foreign namespace context. +#document +| +| +| +| +| +| +| "foo" +| +| "bar" +|

      +| "baz" + +#data +

      +#errors +31: Stray “svg” start tag. +35: Stray “g” start tag. +40: Stray end tag “g” +44: Stray “g” start tag. +49: Stray end tag “g” +52: Stray “p” start tag. +58: Stray “span” start tag. +58: End of file seen and there were open elements. +#document +| +| +| +| + +#data +

      +#errors +42: Stray “svg” start tag. +46: Stray “g” start tag. +51: Stray end tag “g” +55: Stray “g” start tag. +60: Stray end tag “g” +63: Stray “p” start tag. +69: Stray “span” start tag. +#document +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| xlink:href="foo" +| +| xlink href="foo" + +#data + +#errors +#document +| +| +| +| +| xlink:href="foo" +| xml:lang="en" +| +| +| xlink href="foo" +| xml lang="en" + +#data + +#errors +#document +| +| +| +| +| xlink:href="foo" +| xml:lang="en" +| +| +| xlink href="foo" +| xml lang="en" + +#data +bar +#errors +#document +| +| +| +| +| xlink:href="foo" +| xml:lang="en" +| +| +| xlink href="foo" +| xml lang="en" +| "bar" + +#data + +#errors +#document +| +| +| +| + +#data +

      a +#errors +#document +| +| +| +|
      +| +| "a" + +#data +
      a +#errors +#document +| +| +| +|
      +| +| +| "a" + +#data +
      +#errors +#document +| +| +| +|
      +| +| +| + +#data +
      a +#errors +#document +| +| +| +|
      +| +| +| +| +| "a" + +#data +

      a +#errors +#document +| +| +| +|

      +| +| +| +|

      +| "a" + +#data +
        a +#errors +40: HTML start tag “ul” in a foreign namespace context. +41: End of file in a foreign namespace context. +#document +| +| +| +| +| +| +|
        +| +|
          +| "a" + +#data +
            a +#errors +35: HTML start tag “ul” in a foreign namespace context. +36: End of file in a foreign namespace context. +#document +| +| +| +| +| +| +| +|
              +| "a" + +#data +

              +#errors +#document +| +| +| +| +|

              +| +| +|

              + +#data +

              +#errors +#document +| +| +| +| +|

              +| +| +|

              + +#data +

              +#errors +#document +| +| +| +|

              +| +| +| +|

              +|

              + +#data +
              +#errors +#document +| +| +| +| +| +|
              +| +|
              +| +| + +#data +
              +#errors +#document +| +| +| +| +| +| +| +|
              +|
              +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data +

      +#errors +#document +| +| +| +| +|
      +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| +| + +#data +
      +#errors +#document +| +| +| +| +| +| +| +|
      +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| +| +| +| +| +| +| +| +| diff --git a/html5lib/tests/testdata/tree-construction/tests11.dat b/html5lib/tests/testdata/tree-construction/tests11.dat new file mode 100644 index 00000000..638cde47 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests11.dat @@ -0,0 +1,482 @@ +#data + +#errors +#document +| +| +| +| +| +| attributeName="" +| attributeType="" +| baseFrequency="" +| baseProfile="" +| calcMode="" +| clipPathUnits="" +| contentScriptType="" +| contentStyleType="" +| diffuseConstant="" +| edgeMode="" +| externalResourcesRequired="" +| filterRes="" +| filterUnits="" +| glyphRef="" +| gradientTransform="" +| gradientUnits="" +| kernelMatrix="" +| kernelUnitLength="" +| keyPoints="" +| keySplines="" +| keyTimes="" +| lengthAdjust="" +| limitingConeAngle="" +| markerHeight="" +| markerUnits="" +| markerWidth="" +| maskContentUnits="" +| maskUnits="" +| numOctaves="" +| pathLength="" +| patternContentUnits="" +| patternTransform="" +| patternUnits="" +| pointsAtX="" +| pointsAtY="" +| pointsAtZ="" +| preserveAlpha="" +| preserveAspectRatio="" +| primitiveUnits="" +| refX="" +| refY="" +| repeatCount="" +| repeatDur="" +| requiredExtensions="" +| requiredFeatures="" +| specularConstant="" +| specularExponent="" +| spreadMethod="" +| startOffset="" +| stdDeviation="" +| stitchTiles="" +| surfaceScale="" +| systemLanguage="" +| tableValues="" +| targetX="" +| targetY="" +| textLength="" +| viewBox="" +| viewTarget="" +| xChannelSelector="" +| yChannelSelector="" +| zoomAndPan="" + +#data + +#errors +#document +| +| +| +| +| +| attributeName="" +| attributeType="" +| baseFrequency="" +| baseProfile="" +| calcMode="" +| clipPathUnits="" +| contentScriptType="" +| contentStyleType="" +| diffuseConstant="" +| edgeMode="" +| externalResourcesRequired="" +| filterRes="" +| filterUnits="" +| glyphRef="" +| gradientTransform="" +| gradientUnits="" +| kernelMatrix="" +| kernelUnitLength="" +| keyPoints="" +| keySplines="" +| keyTimes="" +| lengthAdjust="" +| limitingConeAngle="" +| markerHeight="" +| markerUnits="" +| markerWidth="" +| maskContentUnits="" +| maskUnits="" +| numOctaves="" +| pathLength="" +| patternContentUnits="" +| patternTransform="" +| patternUnits="" +| pointsAtX="" +| pointsAtY="" +| pointsAtZ="" +| preserveAlpha="" +| preserveAspectRatio="" +| primitiveUnits="" +| refX="" +| refY="" +| repeatCount="" +| repeatDur="" +| requiredExtensions="" +| requiredFeatures="" +| specularConstant="" +| specularExponent="" +| spreadMethod="" +| startOffset="" +| stdDeviation="" +| stitchTiles="" +| surfaceScale="" +| systemLanguage="" +| tableValues="" +| targetX="" +| targetY="" +| textLength="" +| viewBox="" +| viewTarget="" +| xChannelSelector="" +| yChannelSelector="" +| zoomAndPan="" + +#data + +#errors +#document +| +| +| +| +| +| attributeName="" +| attributeType="" +| baseFrequency="" +| baseProfile="" +| calcMode="" +| clipPathUnits="" +| contentScriptType="" +| contentStyleType="" +| diffuseConstant="" +| edgeMode="" +| externalResourcesRequired="" +| filterRes="" +| filterUnits="" +| glyphRef="" +| gradientTransform="" +| gradientUnits="" +| kernelMatrix="" +| kernelUnitLength="" +| keyPoints="" +| keySplines="" +| keyTimes="" +| lengthAdjust="" +| limitingConeAngle="" +| markerHeight="" +| markerUnits="" +| markerWidth="" +| maskContentUnits="" +| maskUnits="" +| numOctaves="" +| pathLength="" +| patternContentUnits="" +| patternTransform="" +| patternUnits="" +| pointsAtX="" +| pointsAtY="" +| pointsAtZ="" +| preserveAlpha="" +| preserveAspectRatio="" +| primitiveUnits="" +| refX="" +| refY="" +| repeatCount="" +| repeatDur="" +| requiredExtensions="" +| requiredFeatures="" +| specularConstant="" +| specularExponent="" +| spreadMethod="" +| startOffset="" +| stdDeviation="" +| stitchTiles="" +| surfaceScale="" +| systemLanguage="" +| tableValues="" +| targetX="" +| targetY="" +| textLength="" +| viewBox="" +| viewTarget="" +| xChannelSelector="" +| yChannelSelector="" +| zoomAndPan="" + +#data + +#errors +#document +| +| +| +| +| +| attributename="" +| attributetype="" +| basefrequency="" +| baseprofile="" +| calcmode="" +| clippathunits="" +| contentscripttype="" +| contentstyletype="" +| diffuseconstant="" +| edgemode="" +| externalresourcesrequired="" +| filterres="" +| filterunits="" +| glyphref="" +| gradienttransform="" +| gradientunits="" +| kernelmatrix="" +| kernelunitlength="" +| keypoints="" +| keysplines="" +| keytimes="" +| lengthadjust="" +| limitingconeangle="" +| markerheight="" +| markerunits="" +| markerwidth="" +| maskcontentunits="" +| maskunits="" +| numoctaves="" +| pathlength="" +| patterncontentunits="" +| patterntransform="" +| patternunits="" +| pointsatx="" +| pointsaty="" +| pointsatz="" +| preservealpha="" +| preserveaspectratio="" +| primitiveunits="" +| refx="" +| refy="" +| repeatcount="" +| repeatdur="" +| requiredextensions="" +| requiredfeatures="" +| specularconstant="" +| specularexponent="" +| spreadmethod="" +| startoffset="" +| stddeviation="" +| stitchtiles="" +| surfacescale="" +| systemlanguage="" +| tablevalues="" +| targetx="" +| targety="" +| textlength="" +| viewbox="" +| viewtarget="" +| xchannelselector="" +| ychannelselector="" +| zoomandpan="" + +#data + +#errors +#document +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| diff --git a/html5lib/tests/testdata/tree-construction/tests12.dat b/html5lib/tests/testdata/tree-construction/tests12.dat new file mode 100644 index 00000000..63107d27 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests12.dat @@ -0,0 +1,62 @@ +#data +

      foobazeggs

      spam

      quuxbar +#errors +#document +| +| +| +| +|

      +| "foo" +| +| +| +| "baz" +| +| +| +| +| "eggs" +| +| +|

      +| "spam" +| +| +| +|
      +| +| +| "quux" +| "bar" + +#data +foobazeggs

      spam
      quuxbar +#errors +#document +| +| +| +| +| "foo" +| +| +| +| "baz" +| +| +| +| +| "eggs" +| +| +|

      +| "spam" +| +| +| +|
      +| +| +| "quux" +| "bar" diff --git a/html5lib/tests/testdata/tree-construction/tests14.dat b/html5lib/tests/testdata/tree-construction/tests14.dat new file mode 100644 index 00000000..b8713f88 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests14.dat @@ -0,0 +1,74 @@ +#data + +#errors +#document +| +| +| +| +| + +#data + +#errors +#document +| +| +| +| +| +| + +#data + +#errors +15: Unexpected start tag html +#document +| +| +| abc:def="gh" +| +| +| + +#data + +#errors +15: Unexpected start tag html +#document +| +| +| xml:lang="bar" +| +| + +#data + +#errors +#document +| +| +| 123="456" +| +| + +#data + +#errors +#document +| +| +| 123="456" +| 789="012" +| +| + +#data + +#errors +#document +| +| +| +| +| 789="012" diff --git a/html5lib/tests/testdata/tree-construction/tests15.dat b/html5lib/tests/testdata/tree-construction/tests15.dat new file mode 100644 index 00000000..6ce1c0d1 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests15.dat @@ -0,0 +1,208 @@ +#data +

      X +#errors +Line: 1 Col: 31 Unexpected end tag (p). Ignored. +Line: 1 Col: 36 Expected closing tag. Unexpected end of file. +#document +| +| +| +| +|

      +| +| +| +| +| +| +| " " +|

      +| "X" + +#data +

      +

      X +#errors +Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE. +Line: 1 Col: 16 Unexpected end tag (p). Ignored. +Line: 2 Col: 4 Expected closing tag. Unexpected end of file. +#document +| +| +| +|

      +| +| +| +| +| +| +| " +" +|

      +| "X" + +#data + +#errors +Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element. +#document +| +| +| +| +| " " + +#data + +#errors +Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element. +#document +| +| +| +| +| + +#data + +#errors +Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE. +Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element. +#document +| +| +| +| + +#data +X +#errors +Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element. +#document +| +| +| +| +| +| "X" + +#data +<!doctype html><table> X<meta></table> +#errors +Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode. +Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " X" +| <meta> +| <table> + +#data +<!doctype html><table> x</table> +#errors +Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " x" +| <table> + +#data +<!doctype html><table> x </table> +#errors +Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " x " +| <table> + +#data +<!doctype html><table><tr> x</table> +#errors +Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " x" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table>X<style> <tr>x </style> </table> +#errors +Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "X" +| <table> +| <style> +| " <tr>x " +| " " + +#data +<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div> +#errors +Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode. +Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> +| <a> +| "foo" +| <table> +| " " +| <tbody> +| <tr> +| <td> +| "bar" +| " " + +#data +<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes> +#errors +6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”. +13: Stray start tag “frame”. +21: Stray end tag “frame”. +29: Stray end tag “frame”. +39: “frameset” start tag after “body” already open. +105: End of file seen inside an [R]CDATA element. +105: End of file seen and there were open elements. +XXX: These errors are wrong, please fix me! +#document +| <html> +| <head> +| <frameset> +| <frame> +| <frameset> +| <frame> +| <noframes> +| "</frameset><noframes>" + +#data +<!DOCTYPE html><object></html> +#errors +1: Expected closing tag. Unexpected end of file +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <object> diff --git a/html5lib/tests/testdata/tree-construction/tests16.dat b/html5lib/tests/testdata/tree-construction/tests16.dat new file mode 100644 index 00000000..c8ef66f0 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests16.dat @@ -0,0 +1,2299 @@ +#data +<!doctype html><script> +#errors +Line: 1 Col: 23 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| <body> + +#data +<!doctype html><script>a +#errors +Line: 1 Col: 24 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "a" +| <body> + +#data +<!doctype html><script>< +#errors +Line: 1 Col: 24 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<" +| <body> + +#data +<!doctype html><script></ +#errors +Line: 1 Col: 25 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</" +| <body> + +#data +<!doctype html><script></S +#errors +Line: 1 Col: 26 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</S" +| <body> + +#data +<!doctype html><script></SC +#errors +Line: 1 Col: 27 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</SC" +| <body> + +#data +<!doctype html><script></SCR +#errors +Line: 1 Col: 28 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</SCR" +| <body> + +#data +<!doctype html><script></SCRI +#errors +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</SCRI" +| <body> + +#data +<!doctype html><script></SCRIP +#errors +Line: 1 Col: 30 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</SCRIP" +| <body> + +#data +<!doctype html><script></SCRIPT +#errors +Line: 1 Col: 31 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</SCRIPT" +| <body> + +#data +<!doctype html><script></SCRIPT +#errors +Line: 1 Col: 32 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| <body> + +#data +<!doctype html><script></s +#errors +Line: 1 Col: 26 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</s" +| <body> + +#data +<!doctype html><script></sc +#errors +Line: 1 Col: 27 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</sc" +| <body> + +#data +<!doctype html><script></scr +#errors +Line: 1 Col: 28 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</scr" +| <body> + +#data +<!doctype html><script></scri +#errors +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</scri" +| <body> + +#data +<!doctype html><script></scrip +#errors +Line: 1 Col: 30 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</scrip" +| <body> + +#data +<!doctype html><script></script +#errors +Line: 1 Col: 31 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "</script" +| <body> + +#data +<!doctype html><script></script +#errors +Line: 1 Col: 32 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| <body> + +#data +<!doctype html><script><! +#errors +Line: 1 Col: 25 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!" +| <body> + +#data +<!doctype html><script><!a +#errors +Line: 1 Col: 26 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!a" +| <body> + +#data +<!doctype html><script><!- +#errors +Line: 1 Col: 26 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!-" +| <body> + +#data +<!doctype html><script><!-a +#errors +Line: 1 Col: 27 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!-a" +| <body> + +#data +<!doctype html><script><!-- +#errors +Line: 1 Col: 27 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--" +| <body> + +#data +<!doctype html><script><!--a +#errors +Line: 1 Col: 28 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--a" +| <body> + +#data +<!doctype html><script><!--< +#errors +Line: 1 Col: 28 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<" +| <body> + +#data +<!doctype html><script><!--<a +#errors +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<a" +| <body> + +#data +<!doctype html><script><!--</ +#errors +Line: 1 Col: 27 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--</" +| <body> + +#data +<!doctype html><script><!--</script +#errors +Line: 1 Col: 35 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--</script" +| <body> + +#data +<!doctype html><script><!--</script +#errors +Line: 1 Col: 36 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--" +| <body> + +#data +<!doctype html><script><!--<s +#errors +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<s" +| <body> + +#data +<!doctype html><script><!--<script +#errors +Line: 1 Col: 34 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script" +| <body> + +#data +<!doctype html><script><!--<script +#errors +Line: 1 Col: 35 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script " +| <body> + +#data +<!doctype html><script><!--<script < +#errors +Line: 1 Col: 36 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script <" +| <body> + +#data +<!doctype html><script><!--<script <a +#errors +Line: 1 Col: 37 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script <a" +| <body> + +#data +<!doctype html><script><!--<script </ +#errors +Line: 1 Col: 37 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </" +| <body> + +#data +<!doctype html><script><!--<script </s +#errors +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </s" +| <body> + +#data +<!doctype html><script><!--<script </script +#errors +Line: 1 Col: 43 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script" +| <body> + +#data +<!doctype html><script><!--<script </scripta +#errors +Line: 1 Col: 44 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </scripta" +| <body> + +#data +<!doctype html><script><!--<script </script +#errors +Line: 1 Col: 44 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<!doctype html><script><!--<script </script> +#errors +Line: 1 Col: 44 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script>" +| <body> + +#data +<!doctype html><script><!--<script </script/ +#errors +Line: 1 Col: 44 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script/" +| <body> + +#data +<!doctype html><script><!--<script </script < +#errors +Line: 1 Col: 45 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script <" +| <body> + +#data +<!doctype html><script><!--<script </script <a +#errors +Line: 1 Col: 46 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script <a" +| <body> + +#data +<!doctype html><script><!--<script </script </ +#errors +Line: 1 Col: 46 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script </" +| <body> + +#data +<!doctype html><script><!--<script </script </script +#errors +Line: 1 Col: 52 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script </script" +| <body> + +#data +<!doctype html><script><!--<script </script </script +#errors +Line: 1 Col: 53 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<!doctype html><script><!--<script </script </script/ +#errors +Line: 1 Col: 53 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<!doctype html><script><!--<script </script </script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<!doctype html><script><!--<script - +#errors +Line: 1 Col: 36 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -" +| <body> + +#data +<!doctype html><script><!--<script -a +#errors +Line: 1 Col: 37 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -a" +| <body> + +#data +<!doctype html><script><!--<script -< +#errors +Line: 1 Col: 37 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -<" +| <body> + +#data +<!doctype html><script><!--<script -- +#errors +Line: 1 Col: 37 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --" +| <body> + +#data +<!doctype html><script><!--<script --a +#errors +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --a" +| <body> + +#data +<!doctype html><script><!--<script --< +#errors +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --<" +| <body> + +#data +<!doctype html><script><!--<script --> +#errors +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<!doctype html><script><!--<script -->< +#errors +Line: 1 Col: 39 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --><" +| <body> + +#data +<!doctype html><script><!--<script --></ +#errors +Line: 1 Col: 40 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --></" +| <body> + +#data +<!doctype html><script><!--<script --></script +#errors +Line: 1 Col: 46 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script --></script" +| <body> + +#data +<!doctype html><script><!--<script --></script +#errors +Line: 1 Col: 47 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<!doctype html><script><!--<script --></script/ +#errors +Line: 1 Col: 47 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<!doctype html><script><!--<script --></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<!doctype html><script><!--<script><\/script>--></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script><\/script>-->" +| <body> + +#data +<!doctype html><script><!--<script></scr'+'ipt>--></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></scr'+'ipt>-->" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script>--><!--</script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>--><!--" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script>-- ></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>-- >" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script>- -></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>- ->" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script>- - ></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>- - >" +| <body> + +#data +<!doctype html><script><!--<script></script><script></script>-></script> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>->" +| <body> + +#data +<!doctype html><script><!--<script>--!></script>X +#errors +Line: 1 Col: 49 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script>--!></script>X" +| <body> + +#data +<!doctype html><script><!--<scr'+'ipt></script>--></script> +#errors +Line: 1 Col: 59 Unexpected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<scr'+'ipt>" +| <body> +| "-->" + +#data +<!doctype html><script><!--<script></scr'+'ipt></script>X +#errors +Line: 1 Col: 57 Unexpected end of file. Expected end tag (script). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| "<!--<script></scr'+'ipt></script>X" +| <body> + +#data +<!doctype html><style><!--<style></style>--></style> +#errors +Line: 1 Col: 52 Unexpected end tag (style). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--<style>" +| <body> +| "-->" + +#data +<!doctype html><style><!--</style>X +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--" +| <body> +| "X" + +#data +<!doctype html><style><!--...</style>...--></style> +#errors +Line: 1 Col: 51 Unexpected end tag (style). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--..." +| <body> +| "...-->" + +#data +<!doctype html><style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>" +| <body> +| "X" + +#data +<!doctype html><style><!--...<style><!--...--!></style>--></style> +#errors +Line: 1 Col: 66 Unexpected end tag (style). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--...<style><!--...--!>" +| <body> +| "-->" + +#data +<!doctype html><style><!--...</style><!-- --><style>@import ...</style> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "<!--..." +| <!-- --> +| <style> +| "@import ..." +| <body> + +#data +<!doctype html><style>...<style><!--...</style><!-- --></style> +#errors +Line: 1 Col: 63 Unexpected end tag (style). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "...<style><!--..." +| <!-- --> +| <body> + +#data +<!doctype html><style>...<!--[if IE]><style>...</style>X +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <style> +| "...<!--[if IE]><style>..." +| <body> +| "X" + +#data +<!doctype html><title><!--<title>--> +#errors +Line: 1 Col: 52 Unexpected end tag (title). +#document +| +| +| +| +| "<!--<title>" +| <body> +| "-->" + +#data +<!doctype html><title></title> +#errors +#document +| +| +| +| +| "" +| + +#data +foo/title><link></head><body>X +#errors +Line: 1 Col: 52 Unexpected end of file. Expected end tag (title). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <title> +| "foo/title><link></head><body>X" +| <body> + +#data +<!doctype html><noscript><!--<noscript></noscript>--></noscript> +#errors +Line: 1 Col: 64 Unexpected end tag (noscript). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <noscript> +| "<!--<noscript>" +| <body> +| "-->" + +#data +<!doctype html><noscript><!--</noscript>X<noscript>--></noscript> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <noscript> +| "<!--" +| <body> +| "X" +| <noscript> +| "-->" + +#data +<!doctype html><noscript><iframe></noscript>X +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <noscript> +| "<iframe>" +| <body> +| "X" + +#data +<!doctype html><noframes><!--<noframes></noframes>--></noframes> +#errors +Line: 1 Col: 64 Unexpected end tag (noframes). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <noframes> +| "<!--<noframes>" +| <body> +| "-->" + +#data +<!doctype html><noframes><body><script><!--...</script></body></noframes></html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <noframes> +| "<body><script><!--...</script></body>" +| <body> + +#data +<!doctype html><textarea><!--<textarea></textarea>--></textarea> +#errors +Line: 1 Col: 64 Unexpected end tag (textarea). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> +| "<!--<textarea>" +| "-->" + +#data +<!doctype html><textarea></textarea></textarea> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> +| "</textarea>" + +#data +<!doctype html><textarea><</textarea> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> +| "<" + +#data +<!doctype html><textarea>a<b</textarea> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> +| "a<b" + +#data +<!doctype html><iframe><!--<iframe></iframe>--></iframe> +#errors +Line: 1 Col: 56 Unexpected end tag (iframe). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <iframe> +| "<!--<iframe>" +| "-->" + +#data +<!doctype html><iframe>...<!--X->...<!--/X->...</iframe> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <iframe> +| "...<!--X->...<!--/X->..." + +#data +<!doctype html><xmp><!--<xmp></xmp>--></xmp> +#errors +Line: 1 Col: 44 Unexpected end tag (xmp). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <xmp> +| "<!--<xmp>" +| "-->" + +#data +<!doctype html><noembed><!--<noembed></noembed>--></noembed> +#errors +Line: 1 Col: 60 Unexpected end tag (noembed). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <noembed> +| "<!--<noembed>" +| "-->" + +#data +<script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 8 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| <body> + +#data +<script>a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 9 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "a" +| <body> + +#data +<script>< +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 9 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<" +| <body> + +#data +<script></ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 10 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</" +| <body> + +#data +<script></S +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</S" +| <body> + +#data +<script></SC +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 12 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</SC" +| <body> + +#data +<script></SCR +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 13 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</SCR" +| <body> + +#data +<script></SCRI +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 14 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</SCRI" +| <body> + +#data +<script></SCRIP +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 15 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</SCRIP" +| <body> + +#data +<script></SCRIPT +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 16 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</SCRIPT" +| <body> + +#data +<script></SCRIPT +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 17 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| <body> + +#data +<script></s +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</s" +| <body> + +#data +<script></sc +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 12 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</sc" +| <body> + +#data +<script></scr +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 13 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</scr" +| <body> + +#data +<script></scri +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 14 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</scri" +| <body> + +#data +<script></scrip +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 15 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</scrip" +| <body> + +#data +<script></script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 16 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</script" +| <body> + +#data +<script></script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 17 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| <body> + +#data +<script><! +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 10 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!" +| <body> + +#data +<script><!a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!a" +| <body> + +#data +<script><!- +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!-" +| <body> + +#data +<script><!-a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 12 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!-a" +| <body> + +#data +<script><!-- +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 12 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--" +| <body> + +#data +<script><!--a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 13 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--a" +| <body> + +#data +<script><!--< +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 13 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<" +| <body> + +#data +<script><!--<a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 14 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<a" +| <body> + +#data +<script><!--</ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 14 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--</" +| <body> + +#data +<script><!--</script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 20 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--</script" +| <body> + +#data +<script><!--</script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 21 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--" +| <body> + +#data +<script><!--<s +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 14 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<s" +| <body> + +#data +<script><!--<script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 19 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script" +| <body> + +#data +<script><!--<script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 20 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script " +| <body> + +#data +<script><!--<script < +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 21 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script <" +| <body> + +#data +<script><!--<script <a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 22 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script <a" +| <body> + +#data +<script><!--<script </ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 22 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </" +| <body> + +#data +<script><!--<script </s +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 23 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </s" +| <body> + +#data +<script><!--<script </script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 28 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script" +| <body> + +#data +<script><!--<script </scripta +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </scripta" +| <body> + +#data +<script><!--<script </script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<script><!--<script </script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script>" +| <body> + +#data +<script><!--<script </script/ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 29 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script/" +| <body> + +#data +<script><!--<script </script < +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 30 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script <" +| <body> + +#data +<script><!--<script </script <a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 31 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script <a" +| <body> + +#data +<script><!--<script </script </ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 31 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script </" +| <body> + +#data +<script><!--<script </script </script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script </script" +| <body> + +#data +<script><!--<script </script </script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<script><!--<script </script </script/ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 38 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<script><!--<script </script </script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script </script " +| <body> + +#data +<script><!--<script - +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 21 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script -" +| <body> + +#data +<script><!--<script -a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 22 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script -a" +| <body> + +#data +<script><!--<script -- +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 22 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script --" +| <body> + +#data +<script><!--<script --a +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 23 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script --a" +| <body> + +#data +<script><!--<script --> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 23 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<script><!--<script -->< +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 24 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script --><" +| <body> + +#data +<script><!--<script --></ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 25 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script --></" +| <body> + +#data +<script><!--<script --></script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 31 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script --></script" +| <body> + +#data +<script><!--<script --></script +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 32 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<script><!--<script --></script/ +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 32 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<script><!--<script --></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script -->" +| <body> + +#data +<script><!--<script><\/script>--></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script><\/script>-->" +| <body> + +#data +<script><!--<script></scr'+'ipt>--></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></scr'+'ipt>-->" +| <body> + +#data +<script><!--<script></script><script></script></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>" +| <body> + +#data +<script><!--<script></script><script></script>--><!--</script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>--><!--" +| <body> + +#data +<script><!--<script></script><script></script>-- ></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>-- >" +| <body> + +#data +<script><!--<script></script><script></script>- -></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>- ->" +| <body> + +#data +<script><!--<script></script><script></script>- - ></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>- - >" +| <body> + +#data +<script><!--<script></script><script></script>-></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +#document +| <html> +| <head> +| <script> +| "<!--<script></script><script></script>->" +| <body> + +#data +<script><!--<script>--!></script>X +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 34 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script>--!></script>X" +| <body> + +#data +<script><!--<scr'+'ipt></script>--></script> +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 44 Unexpected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<scr'+'ipt>" +| <body> +| "-->" + +#data +<script><!--<script></scr'+'ipt></script>X +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 42 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "<!--<script></scr'+'ipt></script>X" +| <body> + +#data +<style><!--<style></style>--></style> +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +Line: 1 Col: 37 Unexpected end tag (style). +#document +| <html> +| <head> +| <style> +| "<!--<style>" +| <body> +| "-->" + +#data +<style><!--</style>X +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| <html> +| <head> +| <style> +| "<!--" +| <body> +| "X" + +#data +<style><!--...</style>...--></style> +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +Line: 1 Col: 36 Unexpected end tag (style). +#document +| <html> +| <head> +| <style> +| "<!--..." +| <body> +| "...-->" + +#data +<style><!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| <html> +| <head> +| <style> +| "<!--<br><html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>" +| <body> +| "X" + +#data +<style><!--...<style><!--...--!></style>--></style> +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +Line: 1 Col: 51 Unexpected end tag (style). +#document +| <html> +| <head> +| <style> +| "<!--...<style><!--...--!>" +| <body> +| "-->" + +#data +<style><!--...</style><!-- --><style>@import ...</style> +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| <html> +| <head> +| <style> +| "<!--..." +| <!-- --> +| <style> +| "@import ..." +| <body> + +#data +<style>...<style><!--...</style><!-- --></style> +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +Line: 1 Col: 48 Unexpected end tag (style). +#document +| <html> +| <head> +| <style> +| "...<style><!--..." +| <!-- --> +| <body> + +#data +<style>...<!--[if IE]><style>...</style>X +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| <html> +| <head> +| <style> +| "...<!--[if IE]><style>..." +| <body> +| "X" + +#data +<title><!--<title>--> +#errors +Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +Line: 1 Col: 37 Unexpected end tag (title). +#document +| +| +| +| "<!--<title>" +| <body> +| "-->" + +#data +<title></title> +#errors +Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +#document +| +| +| +| "" +| + +#data +foo/title><link></head><body>X +#errors +Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE. +Line: 1 Col: 37 Unexpected end of file. Expected end tag (title). +#document +| <html> +| <head> +| <title> +| "foo/title><link></head><body>X" +| <body> + +#data +<noscript><!--<noscript></noscript>--></noscript> +#errors +Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE. +Line: 1 Col: 49 Unexpected end tag (noscript). +#document +| <html> +| <head> +| <noscript> +| "<!--<noscript>" +| <body> +| "-->" + +#data +<noscript><!--</noscript>X<noscript>--></noscript> +#errors +Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE. +#document +| <html> +| <head> +| <noscript> +| "<!--" +| <body> +| "X" +| <noscript> +| "-->" + +#data +<noscript><iframe></noscript>X +#errors +Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE. +#document +| <html> +| <head> +| <noscript> +| "<iframe>" +| <body> +| "X" + +#data +<noframes><!--<noframes></noframes>--></noframes> +#errors +Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE. +Line: 1 Col: 49 Unexpected end tag (noframes). +#document +| <html> +| <head> +| <noframes> +| "<!--<noframes>" +| <body> +| "-->" + +#data +<noframes><body><script><!--...</script></body></noframes></html> +#errors +Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE. +#document +| <html> +| <head> +| <noframes> +| "<body><script><!--...</script></body>" +| <body> + +#data +<textarea><!--<textarea></textarea>--></textarea> +#errors +Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. +Line: 1 Col: 49 Unexpected end tag (textarea). +#document +| <html> +| <head> +| <body> +| <textarea> +| "<!--<textarea>" +| "-->" + +#data +<textarea></textarea></textarea> +#errors +Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| <textarea> +| "</textarea>" + +#data +<iframe><!--<iframe></iframe>--></iframe> +#errors +Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. +Line: 1 Col: 41 Unexpected end tag (iframe). +#document +| <html> +| <head> +| <body> +| <iframe> +| "<!--<iframe>" +| "-->" + +#data +<iframe>...<!--X->...<!--/X->...</iframe> +#errors +Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| <iframe> +| "...<!--X->...<!--/X->..." + +#data +<xmp><!--<xmp></xmp>--></xmp> +#errors +Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE. +Line: 1 Col: 29 Unexpected end tag (xmp). +#document +| <html> +| <head> +| <body> +| <xmp> +| "<!--<xmp>" +| "-->" + +#data +<noembed><!--<noembed></noembed>--></noembed> +#errors +Line: 1 Col: 9 Unexpected start tag (noembed). Expected DOCTYPE. +Line: 1 Col: 45 Unexpected end tag (noembed). +#document +| <html> +| <head> +| <body> +| <noembed> +| "<!--<noembed>" +| "-->" + +#data +<!doctype html><table> + +#errors +Line 2 Col 0 Unexpected end of file. Expected table content. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| " +" + +#data +<!doctype html><table><td><span><font></span><span> +#errors +Line 1 Col 26 Unexpected table cell start tag (td) in the table body phase. +Line 1 Col 45 Unexpected end tag (span). +Line 1 Col 51 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <span> +| <font> +| <font> +| <span> + +#data +<!doctype html><form><table></form><form></table></form> +#errors +35: Stray end tag “form”. +41: Start tag “form” seen in “table”. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <table> +| <form> diff --git a/html5lib/tests/testdata/tree-construction/tests17.dat b/html5lib/tests/testdata/tree-construction/tests17.dat new file mode 100644 index 00000000..7b555f88 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests17.dat @@ -0,0 +1,153 @@ +#data +<!doctype html><table><tbody><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><tr><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <table> +| <tbody> +| <tr> +| <td> + +#data +<!doctype html><table><tr><td><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <select> +| <td> + +#data +<!doctype html><table><tr><th><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <th> +| <select> +| <td> + +#data +<!doctype html><table><caption><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <select> +| <tbody> +| <tr> + +#data +<!doctype html><select><tr> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><th> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><tbody> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><thead> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><tfoot> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><select><caption> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><table><tr></table>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| "a" diff --git a/html5lib/tests/testdata/tree-construction/tests18.dat b/html5lib/tests/testdata/tree-construction/tests18.dat new file mode 100644 index 00000000..680e1f06 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests18.dat @@ -0,0 +1,269 @@ +#data +<!doctype html><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> + +#data +<!doctype html><table><tbody><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> + +#data +<!doctype html><table><tbody><tr><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><tbody><tr><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><td><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><caption><plaintext></plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <plaintext> +| "</plaintext>" + +#data +<!doctype html><table><tr><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <tbody> +| <tr> +| <style> +| "</script>" + +#data +<!doctype html><table><tr><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <tbody> +| <tr> +| <script> +| "</style>" + +#data +<!doctype html><table><caption><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| <style> +| "</script>" +| "abc" + +#data +<!doctype html><table><td><style></script></style>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <style> +| "</script>" +| "abc" + +#data +<!doctype html><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" + +#data +<!doctype html><table><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" +| <table> + +#data +<!doctype html><table><tr><select><script></style></script>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <script> +| "</style>" +| "abc" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><frameset></frameset><noframes>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" + +#data +<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" +| <!-- abc --> + +#data +<!doctype html><frameset></frameset></html><noframes>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" + +#data +<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <noframes> +| "abc" +| <!-- abc --> + +#data +<!doctype html><table><tr></tbody><tfoot> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <tfoot> + +#data +<!doctype html><table><td><svg></svg>abc<td> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <svg svg> +| "abc" +| <td> diff --git a/html5lib/tests/testdata/tree-construction/tests19.dat b/html5lib/tests/testdata/tree-construction/tests19.dat new file mode 100644 index 00000000..0d62f5a5 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests19.dat @@ -0,0 +1,1237 @@ +#data +<!doctype html><math><mn DefinitionUrl="foo"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <math mn> +| definitionURL="foo" + +#data +<!doctype html><html></p><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <!-- foo --> +| <head> +| <body> + +#data +<!doctype html><head></head></p><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <!-- foo --> +| <body> + +#data +<!doctype html><body><p><pre> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <pre> + +#data +<!doctype html><body><p><listing> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <listing> + +#data +<!doctype html><p><plaintext> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <plaintext> + +#data +<!doctype html><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <h1> + +#data +<!doctype html><form><isindex> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> + +#data +<!doctype html><isindex action="POST"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| action="POST" +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><isindex prompt="this is isindex"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "this is isindex" +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><isindex type="hidden"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| type="hidden" +| <hr> + +#data +<!doctype html><isindex name="foo"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| <hr> + +#data +<!doctype html><ruby><p><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <p> +| <rp> + +#data +<!doctype html><ruby><div><span><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <span> +| <rp> + +#data +<!doctype html><ruby><div><p><rp> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <p> +| <rp> + +#data +<!doctype html><ruby><p><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <p> +| <rt> + +#data +<!doctype html><ruby><div><span><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <span> +| <rt> + +#data +<!doctype html><ruby><div><p><rt> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <ruby> +| <div> +| <p> +| <rt> + +#data +<!doctype html><math/><foo> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> +| <foo> + +#data +<!doctype html><svg/><foo> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| <foo> + +#data +<!doctype html><div></body><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> +| <!-- foo --> + +#data +<!doctype html><h1><div><h3><span></h1>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <h1> +| <div> +| <h3> +| <span> +| "foo" + +#data +<!doctype html><p></h3>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| "foo" + +#data +<!doctype html><h3><li>abc</h2>foo +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <h3> +| <li> +| "abc" +| "foo" + +#data +<!doctype html><table>abc<!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "abc" +| <table> +| <!-- foo --> + +#data +<!doctype html><table> <!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| " " +| <!-- foo --> + +#data +<!doctype html><table> b <!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| " b " +| <table> +| <!-- foo --> + +#data +<!doctype html><select><option><option> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> +| <option> + +#data +<!doctype html><select><option></optgroup> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> + +#data +<!doctype html><select><option></optgroup> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> + +#data +<!doctype html><p><math><mi><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mi> +| <p> +| <h1> + +#data +<!doctype html><p><math><mo><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mo> +| <p> +| <h1> + +#data +<!doctype html><p><math><mn><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mn> +| <p> +| <h1> + +#data +<!doctype html><p><math><ms><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math ms> +| <p> +| <h1> + +#data +<!doctype html><p><math><mtext><p><h1> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mtext> +| <p> +| <h1> + +#data +<!doctype html><frameset></noframes> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html c=d><body></html><html a=b> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <body> + +#data +<!doctype html><html c=d><frameset></frameset></html><html a=b> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html><!--foo--> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <!-- foo --> + +#data +<!doctype html><html><frameset></frameset></html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| " " + +#data +<!doctype html><html><frameset></frameset></html>abc +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html><p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><html><frameset></frameset></html></p> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<html><frameset></frameset></html><!doctype html> +#errors +#document +| <html> +| <head> +| <frameset> + +#data +<!doctype html><body><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> + +#data +<!doctype html><p><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><p>a<frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| "a" + +#data +<!doctype html><p> <frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><pre><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <pre> + +#data +<!doctype html><listing><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <listing> + +#data +<!doctype html><li><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <li> + +#data +<!doctype html><dd><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <dd> + +#data +<!doctype html><dt><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <dt> + +#data +<!doctype html><button><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <button> + +#data +<!doctype html><applet><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <applet> + +#data +<!doctype html><marquee><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <marquee> + +#data +<!doctype html><object><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <object> + +#data +<!doctype html><table><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> + +#data +<!doctype html><area><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <area> + +#data +<!doctype html><basefont><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <basefont> +| <frameset> + +#data +<!doctype html><bgsound><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <bgsound> +| <frameset> + +#data +<!doctype html><br><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <br> + +#data +<!doctype html><embed><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <embed> + +#data +<!doctype html><img><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <img> + +#data +<!doctype html><input><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <input> + +#data +<!doctype html><keygen><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <keygen> + +#data +<!doctype html><wbr><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <wbr> + +#data +<!doctype html><hr><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <hr> + +#data +<!doctype html><textarea></textarea><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <textarea> + +#data +<!doctype html><xmp></xmp><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <xmp> + +#data +<!doctype html><iframe></iframe><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <iframe> + +#data +<!doctype html><select></select><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> + +#data +<!doctype html><svg></svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><math></math><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><svg><foreignObject><div> <frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<!doctype html><svg>a</svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <svg svg> +| "a" + +#data +<!doctype html><svg> </svg><frameset><frame> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> +| <frame> + +#data +<html>aaa<frameset></frameset> +#errors +#document +| <html> +| <head> +| <body> +| "aaa" + +#data +<html> a <frameset></frameset> +#errors +#document +| <html> +| <head> +| <body> +| "a " + +#data +<!doctype html><div><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!doctype html><div><body><frameset> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> + +#data +<!doctype html><p><math></p>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| "a" + +#data +<!doctype html><p><math><mn><span></p>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <math math> +| <math mn> +| <span> +| <p> +| "a" + +#data +<!doctype html><math></html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <math math> + +#data +<!doctype html><meta charset="ascii"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <meta> +| charset="ascii" +| <body> + +#data +<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <meta> +| content="text/html;charset=ascii" +| http-equiv="content-type" +| <body> + +#data +<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8"> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa --> +| <meta> +| charset="utf8" +| <body> + +#data +<!doctype html><html a=b><head></head><html c=d> +#errors +#document +| <!DOCTYPE html> +| <html> +| a="b" +| c="d" +| <head> +| <body> + +#data +<!doctype html><image/> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <img> + +#data +<!doctype html>a<i>b<table>c<b>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "a" +| <i> +| "bc" +| <b> +| "de" +| "f" +| <table> + +#data +<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" +| <table> + +#data +<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" + +#data +<!doctype html><table><i>a<b>b<div>c</i> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <i> +| "c" +| <table> + +#data +<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <b> +| "b" +| <b> +| <div> +| <b> +| <i> +| "c" +| <a> +| "d" +| <a> +| "e" +| <a> +| "f" +| <table> + +#data +<!doctype html><table><i>a<div>b<tr>c<b>d</i>e +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <i> +| "a" +| <div> +| "b" +| <i> +| "c" +| <b> +| "d" +| <b> +| "e" +| <table> +| <tbody> +| <tr> + +#data +<!doctype html><table><td><table><i>a<div>b<b>c</i>d +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| <i> +| "a" +| <div> +| <i> +| "b" +| <b> +| "c" +| <b> +| "d" +| <table> + +#data +<!doctype html><body><bgsound> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <bgsound> + +#data +<!doctype html><body><basefont> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <basefont> + +#data +<!doctype html><a><b></a><basefont> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <a> +| <b> +| <basefont> + +#data +<!doctype html><a><b></a><bgsound> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <a> +| <b> +| <bgsound> + +#data +<!doctype html><figcaption><article></figcaption>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <figcaption> +| <article> +| "a" + +#data +<!doctype html><summary><article></summary>a +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <summary> +| <article> +| "a" + +#data +<!doctype html><p><a><plaintext>b +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <a> +| <plaintext> +| <a> +| "b" + +#data +<!DOCTYPE html><div>a<a></div>b<p>c</p>d +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <div> +| "a" +| <a> +| <a> +| "b" +| <p> +| "c" +| "d" diff --git a/html5lib/tests/testdata/tree-construction/tests2.dat b/html5lib/tests/testdata/tree-construction/tests2.dat new file mode 100644 index 00000000..60d85922 --- /dev/null +++ b/html5lib/tests/testdata/tree-construction/tests2.dat @@ -0,0 +1,763 @@ +#data +<!DOCTYPE html>Test +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "Test" + +#data +<textarea>test</div>test +#errors +Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE. +Line: 1 Col: 24 Expected closing tag. Unexpected end of file. +#document +| <html> +| <head> +| <body> +| <textarea> +| "test</div>test" + +#data +<table><td> +#errors +Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase. +Line: 1 Col: 11 Expected closing tag. Unexpected end of file. +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> + +#data +<table><td>test</tbody></table> +#errors +Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase. +#document +| <html> +| <head> +| <body> +| <table> +| <tbody> +| <tr> +| <td> +| "test" + +#data +<frame>test +#errors +Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE. +Line: 1 Col: 7 Unexpected start tag frame. Ignored. +#document +| <html> +| <head> +| <body> +| "test" + +#data +<!DOCTYPE html><frameset>test +#errors +Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored. +Line: 1 Col: 29 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!DOCTYPE html><frameset><!DOCTYPE html> +#errors +Line: 1 Col: 40 Unexpected DOCTYPE. Ignored. +Line: 1 Col: 40 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <frameset> + +#data +<!DOCTYPE html><font><p><b>test</font> +#errors +Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm. +Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <font> +| <p> +| <font> +| <b> +| "test" + +#data +<!DOCTYPE html><dt><div><dd> +#errors +Line: 1 Col: 28 Missing end tag (div, dt). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <dt> +| <div> +| <dd> + +#data +<script></x +#errors +Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE. +Line: 1 Col: 11 Unexpected end of file. Expected end tag (script). +#document +| <html> +| <head> +| <script> +| "</x" +| <body> + +#data +<table><plaintext><td> +#errors +Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE. +Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode. +Line: 1 Col: 22 Unexpected end of file. Expected table content. +#document +| <html> +| <head> +| <body> +| <plaintext> +| "<td>" +| <table> + +#data +<plaintext></plaintext> +#errors +Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE. +Line: 1 Col: 23 Expected closing tag. Unexpected end of file. +#document +| <html> +| <head> +| <body> +| <plaintext> +| "</plaintext>" + +#data +<!DOCTYPE html><table><tr>TEST +#errors +Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode. +Line: 1 Col: 30 Unexpected end of file. Expected table content. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "TEST" +| <table> +| <tbody> +| <tr> + +#data +<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4> +#errors +Line: 1 Col: 37 Unexpected start tag (body). +Line: 1 Col: 53 Unexpected start tag (body). +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| t1="1" +| t2="2" +| t3="3" +| t4="4" + +#data +</b test +#errors +Line: 1 Col: 8 Unexpected end of file in attribute name. +Line: 1 Col: 8 End tag contains unexpected attributes. +Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE. +Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element. +#document +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html></b test<b &=&>X +#errors +Line: 1 Col: 32 Named entity didn't end with ';'. +Line: 1 Col: 33 End tag contains unexpected attributes. +Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "X" + +#data +<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt +#errors +Line: 1 Col: 9 No space after literal string 'DOCTYPE'. +Line: 1 Col: 54 Unexpected end of file in the tag name. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <script> +| type="text/x-foobar;baz" +| "X</SCRipt" +| <body> + +#data +& +#errors +Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&" + +#data +&# +#errors +Line: 1 Col: 1 Numeric entity expected. Got end of file instead. +Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&#" + +#data +&#X +#errors +Line: 1 Col: 3 Numeric entity expected but none found. +Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&#X" + +#data +&#x +#errors +Line: 1 Col: 3 Numeric entity expected but none found. +Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&#x" + +#data +- +#errors +Line: 1 Col: 4 Numeric entity didn't end with ';'. +Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "-" + +#data +&x-test +#errors +Line: 1 Col: 1 Named entity expected. Got none. +Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&x-test" + +#data +<!doctypehtml><p><li> +#errors +Line: 1 Col: 9 No space after literal string 'DOCTYPE'. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <li> + +#data +<!doctypehtml><p><dt> +#errors +Line: 1 Col: 9 No space after literal string 'DOCTYPE'. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <dt> + +#data +<!doctypehtml><p><dd> +#errors +Line: 1 Col: 9 No space after literal string 'DOCTYPE'. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <dd> + +#data +<!doctypehtml><p><form> +#errors +Line: 1 Col: 9 No space after literal string 'DOCTYPE'. +Line: 1 Col: 23 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| <form> + +#data +<!DOCTYPE html><p></P>X +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <p> +| "X" + +#data +& +#errors +Line: 1 Col: 4 Named entity didn't end with ';'. +Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&" + +#data +&AMp; +#errors +Line: 1 Col: 1 Named entity expected. Got none. +Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "&AMp;" + +#data +<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY> +#errors +Line: 1 Col: 110 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly> + +#data +<!DOCTYPE html>X</body>X +#errors +Line: 1 Col: 24 Unexpected non-space characters in the after body phase. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| "XX" + +#data +<!DOCTYPE html><!-- X +#errors +Line: 1 Col: 21 Unexpected end of file in comment. +#document +| <!DOCTYPE html> +| <!-- X --> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html><table><caption>test TEST</caption><td>test +#errors +Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase. +Line: 1 Col: 58 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <table> +| <caption> +| "test TEST" +| <tbody> +| <tr> +| <td> +| "test" + +#data +<!DOCTYPE html><select><option><optgroup> +#errors +Line: 1 Col: 41 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <option> +| <optgroup> + +#data +<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option> +#errors +Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag. +Line: 1 Col: 76 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <optgroup> +| <option> +| <option> +| <option> + +#data +<!DOCTYPE html><select><optgroup><option><optgroup> +#errors +Line: 1 Col: 51 Expected closing tag. Unexpected end of file. +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <optgroup> +| <option> +| <optgroup> + +#data +<!DOCTYPE html><datalist><option>foo</datalist>bar +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <datalist> +| <option> +| "foo" +| "bar" + +#data +<!DOCTYPE html><font><input><input></font> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <font> +| <input> +| <input> + +#data +<!DOCTYPE html><!-- XXX - XXX --> +#errors +#document +| <!DOCTYPE html> +| <!-- XXX - XXX --> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html><!-- XXX - XXX +#errors +Line: 1 Col: 29 Unexpected end of file in comment (-) +#document +| <!DOCTYPE html> +| <!-- XXX - XXX --> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html><!-- XXX - XXX - XXX --> +#errors +#document +| <!DOCTYPE html> +| <!-- XXX - XXX - XXX --> +| <html> +| <head> +| <body> + +#data +<isindex test=x name=x> +#errors +Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE. +Line: 1 Col: 23 Unexpected start tag isindex. Don't use it! +#document +| <html> +| <head> +| <body> +| <form> +| <hr> +| <label> +| "This is a searchable index. Enter search keywords: " +| <input> +| name="isindex" +| test="x" +| <hr> + +#data +test +test +#errors +Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> +| "test +test" + +#data +<!DOCTYPE html><body><title>test</body> +#errors +#document +| +| +| +| +| +| "test</body>" + +#data +<!DOCTYPE html><body><title>X +#errors +#document +| +| +| +| +| +| "X" +| <meta> +| name="z" +| <link> +| rel="foo" +| <style> +| " +x { content:"</style" } " + +#data +<!DOCTYPE html><select><optgroup></optgroup></select> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> +| <select> +| <optgroup> + +#data + + +#errors +Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE. +#document +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html> <html> +#errors +#document +| <!DOCTYPE html> +| <html> +| <head> +| <body> + +#data +<!DOCTYPE html><script> +</script> <title>x +#errors +#document +| +| +| +| +#errors +Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. +Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved. +#document +| +| +| +#errors +Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. +Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved. +#document +| +| +| +#errors +Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE. +#document +| +| +| +| +| "x" +| x +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +Line: 1 Col: 22 Unexpected end of file. Expected end tag (style). +#document +| +| +| --> x +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| +| +| x +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| +| +| x +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| +| +| x +#errors +Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE. +#document +| +| +|

      +#errors +#document +| +| +| +| +| +| ddd +#errors +#document +| +| +| +#errors +#document +| +| +| +| +|
    • +| +|