diff --git a/bs4/__init__.py b/bs4/__init__.py
index af8c718d..80f6f684 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
"""
__author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.1.0"
+__version__ = "4.1.3"
__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
__license__ = "MIT"
@@ -149,7 +149,7 @@ class BeautifulSoup(Tag):
features = self.DEFAULT_BUILDER_FEATURES
builder_class = builder_registry.lookup(*features)
if builder_class is None:
- raise ValueError(
+ raise FeatureNotFound(
"Couldn't find a tree builder with the features you "
"requested: %s. Do you need to install a parser library?"
% ",".join(features))
@@ -208,10 +208,10 @@ class BeautifulSoup(Tag):
return navigable
def insert_before(self, successor):
- raise ValueError("BeautifulSoup objects don't support insert_before().")
+ raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
def insert_after(self, successor):
- raise ValueError("BeautifulSoup objects don't support insert_after().")
+ raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
def popTag(self):
tag = self.tagStack.pop()
@@ -267,7 +267,7 @@ class BeautifulSoup(Tag):
for i in range(len(self.tagStack) - 1, 0, -1):
if (name == self.tagStack[i].name
- and nsprefix == self.tagStack[i].nsprefix == nsprefix):
+ and nsprefix == self.tagStack[i].prefix):
numPops = len(self.tagStack) - i
break
if not inclusivePop:
@@ -348,6 +348,10 @@ class StopParsing(Exception):
pass
+class FeatureNotFound(ValueError):
+ pass
+
+
#By default, act as an HTML pretty-printer.
if __name__ == '__main__':
import sys
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 4c22b864..dc7deb93 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -157,7 +157,16 @@ class TreeBuilder(object):
# value is a whitespace-separated list of CSS
# classes. Split it into a list.
value = attrs[cdata_list_attr]
- values = whitespace_re.split(value)
+ if isinstance(value, basestring):
+ values = whitespace_re.split(value)
+ else:
+ # html5lib sometimes calls setAttributes twice
+ # for the same tag when rearranging the parse
+ # tree. On the second call the attribute value
+ # here is already a list. If this happens,
+ # leave the value alone rather than trying to
+ # split it again.
+ values = value
attrs[cdata_list_attr] = values
return attrs
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c78fdff6..f6b91ff5 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder):
attribute = NamespacedAttribute(
"xmlns", prefix, "http://www.w3.org/2000/xmlns/")
attrs[attribute] = namespace
+
+ if self.nsmaps is not None and len(self.nsmaps) > 0:
+ # Namespaces are in play. Find any attributes that came in
+ # from lxml with namespaces attached to their names, and
+ # turn then into NamespacedAttribute objects.
+ new_attrs = {}
+ for attr, value in attrs.items():
+ namespace, attr = self._getNsTag(attr)
+ if namespace is None:
+ new_attrs[attr] = value
+ else:
+ nsprefix = self._prefix_for_namespace(namespace)
+ attr = NamespacedAttribute(nsprefix, attr, namespace)
+ new_attrs[attr] = value
+ attrs = new_attrs
+
namespace, name = self._getNsTag(name)
- if namespace is not None:
- for inverted_nsmap in reversed(self.nsmaps):
- if inverted_nsmap is not None and namespace in inverted_nsmap:
- nsprefix = inverted_nsmap[namespace]
- break
+ nsprefix = self._prefix_for_namespace(namespace)
self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+ def _prefix_for_namespace(self, namespace):
+ """Find the currently active prefix for the given namespace."""
+ if namespace is None:
+ return None
+ for inverted_nsmap in reversed(self.nsmaps):
+ if inverted_nsmap is not None and namespace in inverted_nsmap:
+ return inverted_nsmap[namespace]
+
def end(self, name):
self.soup.endData()
completed_tag = self.soup.tagStack[-1]
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 58cad9ba..983ade0f 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -10,18 +10,30 @@ encoding; that's the tree builder's job.
import codecs
from htmlentitydefs import codepoint2name
import re
-import warnings
+import logging
-# Autodetects character encodings. Very useful.
-# Download from http://chardet.feedparser.org/
-# or 'apt-get install python-chardet'
-# or 'easy_install chardet'
+# Import a library to autodetect character encodings.
+chardet_type = None
try:
- import chardet
- #import chardet.constants
- #chardet.constants._debug = 1
+ # First try the fast C implementation.
+ # PyPI package: cchardet
+ import cchardet
+ def chardet_dammit(s):
+ return cchardet.detect(s)['encoding']
except ImportError:
- chardet = None
+ try:
+ # Fall back to the pure Python implementation
+ # Debian package: python-chardet
+ # PyPI package: chardet
+ import chardet
+ def chardet_dammit(s):
+ return chardet.detect(s)['encoding']
+ #import chardet.constants
+ #chardet.constants._debug = 1
+ except ImportError:
+ # No chardet available.
+ def chardet_dammit(s):
+ return None
# Available from http://cjkpython.i18n.org/.
try:
@@ -207,8 +219,8 @@ class UnicodeDammit:
break
# If no luck and we have auto-detection library, try that:
- if not u and chardet and not isinstance(self.markup, unicode):
- u = self._convert_from(chardet.detect(self.markup)['encoding'])
+ if not u and not isinstance(self.markup, unicode):
+ u = self._convert_from(chardet_dammit(self.markup))
# As a last resort, try utf-8 and windows-1252:
if not u:
@@ -226,10 +238,9 @@ class UnicodeDammit:
if proposed_encoding != "ascii":
u = self._convert_from(proposed_encoding, "replace")
if u is not None:
- warnings.warn(
- UnicodeWarning(
+ logging.warning(
"Some characters could not be decoded, and were "
- "replaced with REPLACEMENT CHARACTER."))
+ "replaced with REPLACEMENT CHARACTER.")
self.contains_replacement_characters = True
break
diff --git a/bs4/element.py b/bs4/element.py
index 91a40078..26422fda 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -820,7 +820,7 @@ class Tag(PageElement):
for string in self._all_strings(True):
yield string
- def get_text(self, separator="", strip=False):
+ def get_text(self, separator=u"", strip=False):
"""
Get all child strings, concatenated using the given separator.
"""
@@ -987,7 +987,7 @@ class Tag(PageElement):
if isinstance(val, list) or isinstance(val, tuple):
val = ' '.join(val)
elif not isinstance(val, basestring):
- val = str(val)
+ val = unicode(val)
elif (
isinstance(val, AttributeValueWithCharsetSubstitution)
and eventual_encoding is not None):
@@ -995,20 +995,21 @@ class Tag(PageElement):
text = self.format_string(val, formatter)
decoded = (
- str(key) + '='
+ unicode(key) + '='
+ EntitySubstitution.quoted_attribute_value(text))
attrs.append(decoded)
close = ''
closeTag = ''
- if self.is_empty_element:
- close = '/'
- else:
- closeTag = '%s>' % self.name
prefix = ''
if self.prefix:
prefix = self.prefix + ":"
+ if self.is_empty_element:
+ close = '/'
+ else:
+ closeTag = '%s%s>' % (prefix, self.name)
+
pretty_print = (indent_level is not None)
if pretty_print:
space = (' ' * (indent_level - 1))
@@ -1120,6 +1121,7 @@ class Tag(PageElement):
callable that takes a string and returns whether or not the
string matches for some custom definition of 'matches'. The
same is true of the tag name."""
+
generator = self.descendants
if not recursive:
generator = self.children
@@ -1168,6 +1170,12 @@ class SoupStrainer(object):
kwargs['class'] = attrs
attrs = None
+ if 'class_' in kwargs:
+ # Treat class_="foo" as a search for the 'class'
+ # attribute, overriding any non-dict value for attrs.
+ kwargs['class'] = kwargs['class_']
+ del kwargs['class_']
+
if kwargs:
if attrs:
attrs = attrs.copy()
diff --git a/bs4/testing.py b/bs4/testing.py
index 5a84b0ba..30e74f42 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -202,6 +202,14 @@ class HTMLTreeBuilderSmokeTest(object):
"
Bar
"
"
Baz
")
+ def test_deeply_nested_multivalued_attribute(self):
+ # html5lib can set the attributes of the same tag many times
+ # as it rearranges the tree. This has caused problems with
+ # multivalued attributes.
+ markup = '
"""
- soup = self.soup(doc)
- second_para = soup.find(id='2')
- bold = soup.b
-
- # Move the tag to the end of the second paragraph.
- soup.find(id='2').append(soup.b)
-
- # The tag is now a child of the second paragraph.
- self.assertEqual(bold.parent, second_para)
-
- self.assertEqual(
- soup.decode(), self.document_for(
- '
")
- # clear using extract()
- a = soup.a
- soup.p.clear()
- self.assertEqual(len(soup.p.contents), 0)
- self.assertTrue(hasattr(a, "contents"))
-
- # clear using decompose()
- em = a.em
- a.clear(decompose=True)
- self.assertFalse(hasattr(em, "contents"))
-
- def test_string_set(self):
- """Tag.string = 'string'"""
- soup = self.soup("")
- soup.a.string = "foo"
- self.assertEqual(soup.a.contents, ["foo"])
- soup.b.string = "bar"
- self.assertEqual(soup.b.contents, ["bar"])
-
- def test_string_set_does_not_affect_original_string(self):
- soup = self.soup("foobar")
- soup.b.string = soup.c.string
- self.assertEqual(soup.a.encode(), b"barbar")
-
- def test_set_string_preserves_class_of_string(self):
- soup = self.soup("")
- cdata = CData("foo")
- soup.a.string = cdata
- self.assertTrue(isinstance(soup.a.string, CData))
-
-class TestElementObjects(SoupTest):
- """Test various features of element objects."""
-
- def test_len(self):
- """The length of an element is its number of children."""
- soup = self.soup("123")
-
- # The BeautifulSoup object itself contains one element: the
- # tag.
- self.assertEqual(len(soup.contents), 1)
- self.assertEqual(len(soup), 1)
-
- # The tag contains three elements: the text node "1", the
- # tag, and the text node "3".
- self.assertEqual(len(soup.top), 3)
- self.assertEqual(len(soup.top.contents), 3)
-
- def test_member_access_invokes_find(self):
- """Accessing a Python member .foo invokes find('foo')"""
- soup = self.soup('')
- self.assertEqual(soup.b, soup.find('b'))
- self.assertEqual(soup.b.i, soup.find('b').find('i'))
- self.assertEqual(soup.a, None)
-
- def test_deprecated_member_access(self):
- soup = self.soup('')
- with warnings.catch_warnings(record=True) as w:
- tag = soup.bTag
- self.assertEqual(soup.b, tag)
- self.assertEqual(
- '.bTag is deprecated, use .find("b") instead.',
- str(w[0].message))
-
- def test_has_attr(self):
- """has_attr() checks for the presence of an attribute.
-
- Please note note: has_attr() is different from
- __in__. has_attr() checks the tag's attributes and __in__
- checks the tag's chidlren.
- """
- soup = self.soup("")
- self.assertTrue(soup.foo.has_attr('attr'))
- self.assertFalse(soup.foo.has_attr('attr2'))
-
-
- def test_attributes_come_out_in_alphabetical_order(self):
- markup = ''
- self.assertSoupEquals(markup, '')
-
- def test_string(self):
- # A tag that contains only a text node makes that node
- # available as .string.
- soup = self.soup("foo")
- self.assertEqual(soup.b.string, 'foo')
-
- def test_empty_tag_has_no_string(self):
- # A tag with no children has no .stirng.
- soup = self.soup("")
- self.assertEqual(soup.b.string, None)
-
- def test_tag_with_multiple_children_has_no_string(self):
- # A tag with no children has no .string.
- soup = self.soup("foo")
- self.assertEqual(soup.b.string, None)
-
- soup = self.soup("foobar")
- self.assertEqual(soup.b.string, None)
-
- # Even if all the children are strings, due to trickery,
- # it won't work--but this would be a good optimization.
- soup = self.soup("foo")
- soup.a.insert(1, "bar")
- self.assertEqual(soup.a.string, None)
-
- def test_tag_with_recursive_string_has_string(self):
- # A tag with a single child which has a .string inherits that
- # .string.
- soup = self.soup("foo")
- self.assertEqual(soup.a.string, "foo")
- self.assertEqual(soup.string, "foo")
-
- def test_lack_of_string(self):
- """Only a tag containing a single text node has a .string."""
- soup = self.soup("feo")
- self.assertFalse(soup.b.string)
-
- soup = self.soup("")
- self.assertFalse(soup.b.string)
-
- def test_all_text(self):
- """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
- soup = self.soup("ar t ")
- self.assertEqual(soup.a.text, "ar t ")
- self.assertEqual(soup.a.get_text(strip=True), "art")
- self.assertEqual(soup.a.get_text(","), "a,r, , t ")
- self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
-
-class TestCDAtaListAttributes(SoupTest):
-
- """Testing cdata-list attributes like 'class'.
- """
- def test_single_value_becomes_list(self):
- soup = self.soup("")
- self.assertEqual(["foo"],soup.a['class'])
-
- def test_multiple_values_becomes_list(self):
- soup = self.soup("")
- self.assertEqual(["foo", "bar"], soup.a['class'])
-
- def test_multiple_values_separated_by_weird_whitespace(self):
- soup = self.soup("")
- self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
-
- def test_attributes_joined_into_string_on_output(self):
- soup = self.soup("")
- self.assertEqual(b'', soup.a.encode())
-
- def test_accept_charset(self):
- soup = self.soup('
+#errors
+Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
+Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
+Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
+Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
+Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
+Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
+Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
+Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
+Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
+Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
+Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
+Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
+Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
+Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
+Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
+Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
+Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
+Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
+Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
+Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
+Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
+Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 140 This element (img) has no end tag.
+Line: 1 Col: 148 Unexpected end tag (title). Ignored.
+Line: 1 Col: 155 Unexpected end tag (span). Ignored.
+Line: 1 Col: 163 Unexpected end tag (style). Ignored.
+Line: 1 Col: 172 Unexpected end tag (script). Ignored.
+Line: 1 Col: 180 Unexpected end tag (table). Ignored.
+Line: 1 Col: 185 Unexpected end tag (th). Ignored.
+Line: 1 Col: 190 Unexpected end tag (td). Ignored.
+Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 203 This element (frame) has no end tag.
+Line: 1 Col: 210 This element (area) has no end tag.
+Line: 1 Col: 217 Unexpected end tag (link). Ignored.
+Line: 1 Col: 225 This element (param) has no end tag.
+Line: 1 Col: 230 This element (hr) has no end tag.
+Line: 1 Col: 238 This element (input) has no end tag.
+Line: 1 Col: 244 Unexpected end tag (col). Ignored.
+Line: 1 Col: 251 Unexpected end tag (base). Ignored.
+Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 269 This element (basefont) has no end tag.
+Line: 1 Col: 279 This element (bgsound) has no end tag.
+Line: 1 Col: 287 This element (embed) has no end tag.
+Line: 1 Col: 296 This element (spacer) has no end tag.
+Line: 1 Col: 300 Unexpected end tag (p). Ignored.
+Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 460 This element (wbr) has no end tag.
+Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 513 Unexpected end tag (html). Ignored.
+Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 520 Unexpected end tag (head). Ignored.
+Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 537 This element (image) has no end tag.
+Line: 1 Col: 547 This element (isindex) has no end tag.
+Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 599 Unexpected end tag (option). Ignored.
+Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
+#document
+|
+|
+|
+|
+|
+
+#data
+
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
+Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
+Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
+Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
+Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
+Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
+Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
+Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
+Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
+Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
+Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
+Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
+Line: 1 Col: 99 Unexpected end tag (select). Ignored.
+Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
+Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
+Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
+Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
+Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
+Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
+Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
+Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
+Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
+Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
+Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
+Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
+Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
+Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
+Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
+Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
+Line: 1 Col: 151 This element (img) has no end tag.
+Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
+Line: 1 Col: 159 Unexpected end tag (title). Ignored.
+Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 166 Unexpected end tag (span). Ignored.
+Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
+Line: 1 Col: 174 Unexpected end tag (style). Ignored.
+Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
+Line: 1 Col: 183 Unexpected end tag (script). Ignored.
+Line: 1 Col: 196 Unexpected end tag (th). Ignored.
+Line: 1 Col: 201 Unexpected end tag (td). Ignored.
+Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 214 This element (frame) has no end tag.
+Line: 1 Col: 221 This element (area) has no end tag.
+Line: 1 Col: 228 Unexpected end tag (link). Ignored.
+Line: 1 Col: 236 This element (param) has no end tag.
+Line: 1 Col: 241 This element (hr) has no end tag.
+Line: 1 Col: 249 This element (input) has no end tag.
+Line: 1 Col: 255 Unexpected end tag (col). Ignored.
+Line: 1 Col: 262 Unexpected end tag (base). Ignored.
+Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 280 This element (basefont) has no end tag.
+Line: 1 Col: 290 This element (bgsound) has no end tag.
+Line: 1 Col: 298 This element (embed) has no end tag.
+Line: 1 Col: 307 This element (spacer) has no end tag.
+Line: 1 Col: 311 Unexpected end tag (p). Ignored.
+Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 471 This element (wbr) has no end tag.
+Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 524 Unexpected end tag (html). Ignored.
+Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 531 Unexpected end tag (head). Ignored.
+Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 548 This element (image) has no end tag.
+Line: 1 Col: 558 This element (isindex) has no end tag.
+Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 610 Unexpected end tag (option). Ignored.
+Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
+#document
+|
+|
+|
+|
+|