diff --git a/bs4/__init__.py b/bs4/__init__.py
new file mode 100644
index 00000000..80f6f684
--- /dev/null
+++ b/bs4/__init__.py
@@ -0,0 +1,359 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup uses a pluggable XML or HTML parser to parse a
+(possibly invalid) document into a tree representation. Beautiful Soup
+provides provides methods and Pythonic idioms that make it easy to
+navigate, search, and modify the parse tree.
+
+Beautiful Soup works with Python 2.6 and up. It works better if lxml
+and/or html5lib is installed.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+"""
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "4.1.3"
+__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
+__license__ = "MIT"
+
+__all__ = ['BeautifulSoup']
+
+import re
+import warnings
+
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import (
+ CData,
+ Comment,
+ DEFAULT_OUTPUT_ENCODING,
+ Declaration,
+ Doctype,
+ NavigableString,
+ PageElement,
+ ProcessingInstruction,
+ ResultSet,
+ SoupStrainer,
+ Tag,
+ )
+
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 3 without converting it.
+syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+
+class BeautifulSoup(Tag):
+ """
+ This class defines the basic interface called by the tree builders.
+
+ These methods will be called by the parser:
+ reset()
+ feed(markup)
+
+ The tree builder may call these methods from its feed() implementation:
+ handle_starttag(name, attrs) # See note about return value
+ handle_endtag(name)
+ handle_data(data) # Appends to the current data node
+ endData(containerClass=NavigableString) # Ends the current data node
+
+ No matter how complicated the underlying parser is, you should be
+ able to build a tree using 'start tag' events, 'end tag' events,
+ 'data' events, and "done with data" events.
+
+ If you encounter an empty-element tag (aka a self-closing tag,
+ like HTML's tag), call handle_starttag and then
+ handle_endtag.
+ """
+ ROOT_TAG_NAME = u'[document]'
+
+ # If the end-user gives no indication which tree builder they
+ # want, look for one with these features.
+ DEFAULT_BUILDER_FEATURES = ['html', 'fast']
+
+ # Used when determining whether a text node is all whitespace and
+ # can be replaced with a single space. A text node that contains
+ # fancy Unicode spaces (usually non-breaking) should be left
+ # alone.
+ STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
+
+ def __init__(self, markup="", features=None, builder=None,
+ parse_only=None, from_encoding=None, **kwargs):
+ """The Soup object is initialized as the 'root tag', and the
+ provided markup (which can be a string or a file-like object)
+ is fed into the underlying parser."""
+
+ if 'convertEntities' in kwargs:
+ warnings.warn(
+ "BS4 does not respect the convertEntities argument to the "
+ "BeautifulSoup constructor. Entities are always converted "
+ "to Unicode characters.")
+
+ if 'markupMassage' in kwargs:
+ del kwargs['markupMassage']
+ warnings.warn(
+ "BS4 does not respect the markupMassage argument to the "
+ "BeautifulSoup constructor. The tree builder is responsible "
+ "for any necessary markup massage.")
+
+ if 'smartQuotesTo' in kwargs:
+ del kwargs['smartQuotesTo']
+ warnings.warn(
+ "BS4 does not respect the smartQuotesTo argument to the "
+ "BeautifulSoup constructor. Smart quotes are always converted "
+ "to Unicode characters.")
+
+ if 'selfClosingTags' in kwargs:
+ del kwargs['selfClosingTags']
+ warnings.warn(
+ "BS4 does not respect the selfClosingTags argument to the "
+ "BeautifulSoup constructor. The tree builder is responsible "
+ "for understanding self-closing tags.")
+
+ if 'isHTML' in kwargs:
+ del kwargs['isHTML']
+ warnings.warn(
+ "BS4 does not respect the isHTML argument to the "
+ "BeautifulSoup constructor. You can pass in features='html' "
+ "or features='xml' to get a builder capable of handling "
+ "one or the other.")
+
+ def deprecated_argument(old_name, new_name):
+ if old_name in kwargs:
+ warnings.warn(
+ 'The "%s" argument to the BeautifulSoup constructor '
+ 'has been renamed to "%s."' % (old_name, new_name))
+ value = kwargs[old_name]
+ del kwargs[old_name]
+ return value
+ return None
+
+ parse_only = parse_only or deprecated_argument(
+ "parseOnlyThese", "parse_only")
+
+ from_encoding = from_encoding or deprecated_argument(
+ "fromEncoding", "from_encoding")
+
+ if len(kwargs) > 0:
+ arg = kwargs.keys().pop()
+ raise TypeError(
+ "__init__() got an unexpected keyword argument '%s'" % arg)
+
+ if builder is None:
+ if isinstance(features, basestring):
+ features = [features]
+ if features is None or len(features) == 0:
+ features = self.DEFAULT_BUILDER_FEATURES
+ builder_class = builder_registry.lookup(*features)
+ if builder_class is None:
+ raise FeatureNotFound(
+ "Couldn't find a tree builder with the features you "
+ "requested: %s. Do you need to install a parser library?"
+ % ",".join(features))
+ builder = builder_class()
+ self.builder = builder
+ self.is_xml = builder.is_xml
+ self.builder.soup = self
+
+ self.parse_only = parse_only
+
+ self.reset()
+
+ if hasattr(markup, 'read'): # It's a file-type object.
+ markup = markup.read()
+ (self.markup, self.original_encoding, self.declared_html_encoding,
+ self.contains_replacement_characters) = (
+ self.builder.prepare_markup(markup, from_encoding))
+
+ try:
+ self._feed()
+ except StopParsing:
+ pass
+
+ # Clear out the markup and remove the builder's circular
+ # reference to this object.
+ self.markup = None
+ self.builder.soup = None
+
+ def _feed(self):
+ # Convert the document to Unicode.
+ self.builder.reset()
+
+ self.builder.feed(self.markup)
+ # Close out any unfinished strings and close all the open tags.
+ self.endData()
+ while self.currentTag.name != self.ROOT_TAG_NAME:
+ self.popTag()
+
+ def reset(self):
+ Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
+ self.hidden = 1
+ self.builder.reset()
+ self.currentData = []
+ self.currentTag = None
+ self.tagStack = []
+ self.pushTag(self)
+
+ def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
+ """Create a new tag associated with this soup."""
+ return Tag(None, self.builder, name, namespace, nsprefix, attrs)
+
+ def new_string(self, s):
+ """Create a new NavigableString associated with this soup."""
+ navigable = NavigableString(s)
+ navigable.setup()
+ return navigable
+
+ def insert_before(self, successor):
+ raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
+
+ def insert_after(self, successor):
+ raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
+
+ def popTag(self):
+ tag = self.tagStack.pop()
+ #print "Pop", tag.name
+ if self.tagStack:
+ self.currentTag = self.tagStack[-1]
+ return self.currentTag
+
+ def pushTag(self, tag):
+ #print "Push", tag.name
+ if self.currentTag:
+ self.currentTag.contents.append(tag)
+ self.tagStack.append(tag)
+ self.currentTag = self.tagStack[-1]
+
+ def endData(self, containerClass=NavigableString):
+ if self.currentData:
+ currentData = u''.join(self.currentData)
+ if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
+ not set([tag.name for tag in self.tagStack]).intersection(
+ self.builder.preserve_whitespace_tags)):
+ if '\n' in currentData:
+ currentData = '\n'
+ else:
+ currentData = ' '
+ self.currentData = []
+ if self.parse_only and len(self.tagStack) <= 1 and \
+ (not self.parse_only.text or \
+ not self.parse_only.search(currentData)):
+ return
+ o = containerClass(currentData)
+ self.object_was_parsed(o)
+
+ def object_was_parsed(self, o):
+ """Add an object to the parse tree."""
+ o.setup(self.currentTag, self.previous_element)
+ if self.previous_element:
+ self.previous_element.next_element = o
+ self.previous_element = o
+ self.currentTag.contents.append(o)
+
+ def _popToTag(self, name, nsprefix=None, inclusivePop=True):
+ """Pops the tag stack up to and including the most recent
+ instance of the given tag. If inclusivePop is false, pops the tag
+ stack up to but *not* including the most recent instqance of
+ the given tag."""
+ #print "Popping to %s" % name
+ if name == self.ROOT_TAG_NAME:
+ return
+
+ numPops = 0
+ mostRecentTag = None
+
+ for i in range(len(self.tagStack) - 1, 0, -1):
+ if (name == self.tagStack[i].name
+ and nsprefix == self.tagStack[i].prefix):
+ numPops = len(self.tagStack) - i
+ break
+ if not inclusivePop:
+ numPops = numPops - 1
+
+ for i in range(0, numPops):
+ mostRecentTag = self.popTag()
+ return mostRecentTag
+
+ def handle_starttag(self, name, namespace, nsprefix, attrs):
+ """Push a start tag on to the stack.
+
+ If this method returns None, the tag was rejected by the
+ SoupStrainer. You should proceed as if the tag had not occured
+ in the document. For instance, if this was a self-closing tag,
+ don't call handle_endtag.
+ """
+
+ # print "Start tag %s: %s" % (name, attrs)
+ self.endData()
+
+ if (self.parse_only and len(self.tagStack) <= 1
+ and (self.parse_only.text
+ or not self.parse_only.search_tag(name, attrs))):
+ return None
+
+ tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
+ self.currentTag, self.previous_element)
+ if tag is None:
+ return tag
+ if self.previous_element:
+ self.previous_element.next_element = tag
+ self.previous_element = tag
+ self.pushTag(tag)
+ return tag
+
+ def handle_endtag(self, name, nsprefix=None):
+ #print "End tag: " + name
+ self.endData()
+ self._popToTag(name, nsprefix)
+
+ def handle_data(self, data):
+ self.currentData.append(data)
+
+ def decode(self, pretty_print=False,
+ eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+ formatter="minimal"):
+ """Returns a string or Unicode representation of this document.
+ To get Unicode, pass None for encoding."""
+
+ if self.is_xml:
+ # Print the XML declaration
+ encoding_part = ''
+ if eventual_encoding != None:
+ encoding_part = ' encoding="%s"' % eventual_encoding
+ prefix = u'\n' % encoding_part
+ else:
+ prefix = u''
+ if not pretty_print:
+ indent_level = None
+ else:
+ indent_level = 0
+ return prefix + super(BeautifulSoup, self).decode(
+ indent_level, eventual_encoding, formatter)
+
+class BeautifulStoneSoup(BeautifulSoup):
+ """Deprecated interface to an XML parser."""
+
+ def __init__(self, *args, **kwargs):
+ kwargs['features'] = 'xml'
+ warnings.warn(
+ 'The BeautifulStoneSoup class is deprecated. Instead of using '
+ 'it, pass features="xml" into the BeautifulSoup constructor.')
+ super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
+
+
+class StopParsing(Exception):
+ pass
+
+
+class FeatureNotFound(ValueError):
+ pass
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+ import sys
+ soup = BeautifulSoup(sys.stdin)
+ print soup.prettify()
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
new file mode 100644
index 00000000..dc7deb93
--- /dev/null
+++ b/bs4/builder/__init__.py
@@ -0,0 +1,316 @@
+from collections import defaultdict
+import itertools
+import sys
+from bs4.element import (
+ CharsetMetaAttributeValue,
+ ContentMetaAttributeValue,
+ whitespace_re
+ )
+
+__all__ = [
+ 'HTMLTreeBuilder',
+ 'SAXTreeBuilder',
+ 'TreeBuilder',
+ 'TreeBuilderRegistry',
+ ]
+
+# Some useful features for a TreeBuilder to have.
+FAST = 'fast'
+PERMISSIVE = 'permissive'
+STRICT = 'strict'
+XML = 'xml'
+HTML = 'html'
+HTML_5 = 'html5'
+
+
+class TreeBuilderRegistry(object):
+
+ def __init__(self):
+ self.builders_for_feature = defaultdict(list)
+ self.builders = []
+
+ def register(self, treebuilder_class):
+ """Register a treebuilder based on its advertised features."""
+ for feature in treebuilder_class.features:
+ self.builders_for_feature[feature].insert(0, treebuilder_class)
+ self.builders.insert(0, treebuilder_class)
+
+ def lookup(self, *features):
+ if len(self.builders) == 0:
+ # There are no builders at all.
+ return None
+
+ if len(features) == 0:
+ # They didn't ask for any features. Give them the most
+ # recently registered builder.
+ return self.builders[0]
+
+ # Go down the list of features in order, and eliminate any builders
+ # that don't match every feature.
+ features = list(features)
+ features.reverse()
+ candidates = None
+ candidate_set = None
+ while len(features) > 0:
+ feature = features.pop()
+ we_have_the_feature = self.builders_for_feature.get(feature, [])
+ if len(we_have_the_feature) > 0:
+ if candidates is None:
+ candidates = we_have_the_feature
+ candidate_set = set(candidates)
+ else:
+ # Eliminate any candidates that don't have this feature.
+ candidate_set = candidate_set.intersection(
+ set(we_have_the_feature))
+
+ # The only valid candidates are the ones in candidate_set.
+ # Go through the original list of candidates and pick the first one
+ # that's in candidate_set.
+ if candidate_set is None:
+ return None
+ for candidate in candidates:
+ if candidate in candidate_set:
+ return candidate
+ return None
+
+# The BeautifulSoup class will take feature lists from developers and use them
+# to look up builders in this registry.
+builder_registry = TreeBuilderRegistry()
+
+class TreeBuilder(object):
+ """Turn a document into a Beautiful Soup object tree."""
+
+ features = []
+
+ is_xml = False
+ preserve_whitespace_tags = set()
+ empty_element_tags = None # A tag will be considered an empty-element
+ # tag when and only when it has no contents.
+
+ # A value for these tag/attribute combinations is a space- or
+ # comma-separated list of CDATA, rather than a single CDATA.
+ cdata_list_attributes = {}
+
+
+ def __init__(self):
+ self.soup = None
+
+ def reset(self):
+ pass
+
+ def can_be_empty_element(self, tag_name):
+ """Might a tag with this name be an empty-element tag?
+
+ The final markup may or may not actually present this tag as
+ self-closing.
+
+ For instance: an HTMLBuilder does not consider a
tag to be
+ an empty-element tag (it's not in
+ HTMLBuilder.empty_element_tags). This means an empty
'
+ soup = self.soup(markup)
+ return doctype, soup
+
+ def test_normal_doctypes(self):
+ """Make sure normal, everyday HTML doctypes are handled correctly."""
+ self.assertDoctypeHandled("html")
+ self.assertDoctypeHandled(
+ 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
+
+ def test_public_doctype_with_url(self):
+ doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
+ self.assertDoctypeHandled(doctype)
+
+ def test_system_doctype(self):
+ self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
+
+ def test_namespaced_system_doctype(self):
+ # We can handle a namespaced doctype with a system ID.
+ self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
+
+ def test_namespaced_public_doctype(self):
+ # Test a namespaced doctype with a public id.
+ self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
+
+ def test_real_xhtml_document(self):
+ """A real XHTML document should come out more or less the same as it went in."""
+ markup = b"""
+
+
+Hello.
+Goodbye.
+"""
+ soup = self.soup(markup)
+ self.assertEqual(
+ soup.encode("utf-8").replace(b"\n", b""),
+ markup.replace(b"\n", b""))
+
+ def test_deepcopy(self):
+ """Make sure you can copy the tree builder.
+
+ This is important because the builder is part of a
+ BeautifulSoup object, and we want to be able to copy that.
+ """
+ copy.deepcopy(self.default_builder)
+
+ def test_p_tag_is_never_empty_element(self):
+ """A
tag is never designated as an empty-element tag.
+
+ Even if the markup shows it as an empty-element tag, it
+ shouldn't be presented that way.
+ """
+ soup = self.soup("
")
+ self.assertFalse(soup.p.is_empty_element)
+ self.assertEqual(str(soup.p), "")
+
+ def test_unclosed_tags_get_closed(self):
+ """A tag that's not closed by the end of the document should be closed.
+
+ This applies to all tags except empty-element tags.
+ """
+ self.assertSoupEquals("
", "
")
+ self.assertSoupEquals("", "")
+
+ self.assertSoupEquals(" ", " ")
+
+ def test_br_is_always_empty_element_tag(self):
+ """A tag is designated as an empty-element tag.
+
+ Some parsers treat as one tag, some parsers as
+ two tags, but it should always be an empty-element tag.
+ """
+ soup = self.soup(" ")
+ self.assertTrue(soup.br.is_empty_element)
+ self.assertEqual(str(soup.br), " ")
+
+ def test_nested_formatting_elements(self):
+ self.assertSoupEquals("")
+
+ def test_comment(self):
+ # Comments are represented as Comment objects.
+ markup = "
foobaz
"
+ self.assertSoupEquals(markup)
+
+ soup = self.soup(markup)
+ comment = soup.find(text="foobar")
+ self.assertEqual(comment.__class__, Comment)
+
+ def test_preserved_whitespace_in_pre_and_textarea(self):
+ """Whitespace must be preserved in
and
+#errors
+Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
+Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
+Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
+Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
+Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
+Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
+Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
+Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
+Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
+Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
+Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
+Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
+Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
+Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
+Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
+Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
+Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
+Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
+Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
+Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
+Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
+Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 140 This element (img) has no end tag.
+Line: 1 Col: 148 Unexpected end tag (title). Ignored.
+Line: 1 Col: 155 Unexpected end tag (span). Ignored.
+Line: 1 Col: 163 Unexpected end tag (style). Ignored.
+Line: 1 Col: 172 Unexpected end tag (script). Ignored.
+Line: 1 Col: 180 Unexpected end tag (table). Ignored.
+Line: 1 Col: 185 Unexpected end tag (th). Ignored.
+Line: 1 Col: 190 Unexpected end tag (td). Ignored.
+Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 203 This element (frame) has no end tag.
+Line: 1 Col: 210 This element (area) has no end tag.
+Line: 1 Col: 217 Unexpected end tag (link). Ignored.
+Line: 1 Col: 225 This element (param) has no end tag.
+Line: 1 Col: 230 This element (hr) has no end tag.
+Line: 1 Col: 238 This element (input) has no end tag.
+Line: 1 Col: 244 Unexpected end tag (col). Ignored.
+Line: 1 Col: 251 Unexpected end tag (base). Ignored.
+Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 269 This element (basefont) has no end tag.
+Line: 1 Col: 279 This element (bgsound) has no end tag.
+Line: 1 Col: 287 This element (embed) has no end tag.
+Line: 1 Col: 296 This element (spacer) has no end tag.
+Line: 1 Col: 300 Unexpected end tag (p). Ignored.
+Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 460 This element (wbr) has no end tag.
+Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 513 Unexpected end tag (html). Ignored.
+Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 520 Unexpected end tag (head). Ignored.
+Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 537 This element (image) has no end tag.
+Line: 1 Col: 547 This element (isindex) has no end tag.
+Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 599 Unexpected end tag (option). Ignored.
+Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
+#document
+|
+|
+|
+|
+|
+
+#data
+
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
+Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
+Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
+Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
+Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
+Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
+Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
+Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
+Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
+Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
+Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
+Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
+Line: 1 Col: 99 Unexpected end tag (select). Ignored.
+Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
+Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
+Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
+Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
+Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
+Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
+Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
+Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
+Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
+Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
+Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
+Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
+Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
+Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
+Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
+Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
+Line: 1 Col: 151 This element (img) has no end tag.
+Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
+Line: 1 Col: 159 Unexpected end tag (title). Ignored.
+Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 166 Unexpected end tag (span). Ignored.
+Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
+Line: 1 Col: 174 Unexpected end tag (style). Ignored.
+Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
+Line: 1 Col: 183 Unexpected end tag (script). Ignored.
+Line: 1 Col: 196 Unexpected end tag (th). Ignored.
+Line: 1 Col: 201 Unexpected end tag (td). Ignored.
+Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 214 This element (frame) has no end tag.
+Line: 1 Col: 221 This element (area) has no end tag.
+Line: 1 Col: 228 Unexpected end tag (link). Ignored.
+Line: 1 Col: 236 This element (param) has no end tag.
+Line: 1 Col: 241 This element (hr) has no end tag.
+Line: 1 Col: 249 This element (input) has no end tag.
+Line: 1 Col: 255 Unexpected end tag (col). Ignored.
+Line: 1 Col: 262 Unexpected end tag (base). Ignored.
+Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 280 This element (basefont) has no end tag.
+Line: 1 Col: 290 This element (bgsound) has no end tag.
+Line: 1 Col: 298 This element (embed) has no end tag.
+Line: 1 Col: 307 This element (spacer) has no end tag.
+Line: 1 Col: 311 Unexpected end tag (p). Ignored.
+Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 471 This element (wbr) has no end tag.
+Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 524 Unexpected end tag (html). Ignored.
+Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 531 Unexpected end tag (head). Ignored.
+Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 548 This element (image) has no end tag.
+Line: 1 Col: 558 This element (isindex) has no end tag.
+Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 610 Unexpected end tag (option). Ignored.
+Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
+#document
+|
+|
+|
+|
+|