diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
index 2a436d34..d8ad5e1d 100644
--- a/lib/bs4/__init__.py
+++ b/lib/bs4/__init__.py
@@ -7,7 +7,7 @@ Beautiful Soup uses a pluggable XML or HTML parser to parse a
 provides methods and Pythonic idioms that make it easy to navigate,
 search, and modify the parse tree.
 
-Beautiful Soup works with Python 3.5 and up. It works better if lxml
+Beautiful Soup works with Python 3.6 and up. It works better if lxml
 and/or html5lib is installed.
 
 For more than you ever wanted to know about Beautiful Soup, see the
@@ -15,14 +15,13 @@ documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.10.0"
-__copyright__ = "Copyright (c) 2004-2021 Leonard Richardson"
+__version__ = "4.12.3"
+__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
 # Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 
 __all__ = ['BeautifulSoup']
 
-
 from collections import Counter
 import os
 import re
@@ -35,11 +34,17 @@ import warnings
 if sys.version_info.major < 3:
     raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
 
-from .builder import builder_registry, ParserRejectedMarkup
+from .builder import (
+    builder_registry,
+    ParserRejectedMarkup,
+    XMLParsedAsHTMLWarning,
+    HTMLParserTreeBuilder
+)
 from .dammit import UnicodeDammit
 from .element import (
     CData,
     Comment,
+    CSS,
     DEFAULT_OUTPUT_ENCODING,
     Declaration,
     Doctype,
@@ -67,7 +72,7 @@ class MarkupResemblesLocatorWarning(UserWarning):
     on disk.
     """
 
-
+   
 class BeautifulSoup(Tag):
     """A data structure representing a parsed HTML or XML document.
 
@@ -113,7 +118,7 @@ class BeautifulSoup(Tag):
     ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
 
     NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
-    
+   
     def __init__(self, markup="", features=None, builder=None,
                  parse_only=None, from_encoding=None, exclude_encodings=None,
                  element_classes=None, **kwargs):
@@ -207,10 +212,10 @@ class BeautifulSoup(Tag):
             if old_name in kwargs:
                 warnings.warn(
                     'The "%s" argument to the BeautifulSoup constructor '
-                    'has been renamed to "%s."' % (old_name, new_name))
-                value = kwargs[old_name]
-                del kwargs[old_name]
-                return value
+                    'has been renamed to "%s."' % (old_name, new_name),
+                    DeprecationWarning, stacklevel=3
+                )
+                return kwargs.pop(old_name)
             return None
 
         parse_only = parse_only or deprecated_argument(
@@ -305,51 +310,18 @@ class BeautifulSoup(Tag):
         self._namespaces = dict()
         self.parse_only = parse_only
 
-        self.builder.initialize_soup(self)
-
         if hasattr(markup, 'read'):        # It's a file-type object.
             markup = markup.read()
         elif len(markup) <= 256 and (
                 (isinstance(markup, bytes) and not b'<' in markup)
                 or (isinstance(markup, str) and not '<' in markup)
         ):
-            # Print out warnings for a couple beginner problems
+            # Issue warnings for a couple beginner problems
             # involving passing non-markup to Beautiful Soup.
             # Beautiful Soup will still parse the input as markup,
-            # just in case that's what the user really wants.
-            if (isinstance(markup, str)
-                and not os.path.supports_unicode_filenames):
-                possible_filename = markup.encode("utf8")
-            else:
-                possible_filename = markup
-            is_file = False
-            is_directory = False
-            try:
-                is_file = os.path.exists(possible_filename)
-                if is_file:
-                    is_directory = os.path.isdir(possible_filename)
-            except Exception as e:
-                # This is almost certainly a problem involving
-                # characters not valid in filenames on this
-                # system. Just let it go.
-                pass
-            if is_directory:
-                warnings.warn(
-                    '"%s" looks like a directory name, not markup. You may'
-                    ' want to open a file found in this directory and pass'
-                    ' the filehandle into Beautiful Soup.' % (
-                        self._decode_markup(markup)
-                    ),
-                    MarkupResemblesLocatorWarning
-                )
-            elif is_file:
-                warnings.warn(
-                    '"%s" looks like a filename, not markup. You should'
-                    ' probably open this file and pass the filehandle into'
-                    ' Beautiful Soup.' % self._decode_markup(markup),
-                    MarkupResemblesLocatorWarning
-                )
-            self._check_markup_is_url(markup)
+            # since that is sometimes the intended behavior.
+            if not self._markup_is_url(markup):
+                self._markup_resembles_filename(markup)                
 
         rejections = []
         success = False
@@ -358,6 +330,7 @@ class BeautifulSoup(Tag):
              self.builder.prepare_markup(
                  markup, from_encoding, exclude_encodings=exclude_encodings)):
             self.reset()
+            self.builder.initialize_soup(self)
             try:
                 self._feed()
                 success = True
@@ -377,26 +350,50 @@ class BeautifulSoup(Tag):
         self.markup = None
         self.builder.soup = None
 
-    def __copy__(self):
-        """Copy a BeautifulSoup object by converting the document to a string and parsing it again."""
-        copy = type(self)(
-            self.encode('utf-8'), builder=self.builder, from_encoding='utf-8'
-        )
+    def _clone(self):
+        """Create a new BeautifulSoup object with the same TreeBuilder,
+        but not associated with any markup.
 
-        # Although we encoded the tree to UTF-8, that may not have
-        # been the encoding of the original markup. Set the copy's
-        # .original_encoding to reflect the original object's
-        # .original_encoding.
-        copy.original_encoding = self.original_encoding
-        return copy
+        This is the first step of the deepcopy process.
+        """
+        clone = type(self)("", None, self.builder)
 
+        # Keep track of the encoding of the original document,
+        # since we won't be parsing it again.
+        clone.original_encoding = self.original_encoding
+        return clone
+        
     def __getstate__(self):
         # Frequently a tree builder can't be pickled.
         d = dict(self.__dict__)
-        if 'builder' in d and not self.builder.picklable:
-            d['builder'] = None
+        if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
+            d['builder'] = type(self.builder)
+        # Store the contents as a Unicode string.
+        d['contents'] = []
+        d['markup'] = self.decode()
+
+        # If _most_recent_element is present, it's a Tag object left
+        # over from initial parse. It might not be picklable and we
+        # don't need it.
+        if '_most_recent_element' in d:
+            del d['_most_recent_element']
         return d
 
+    def __setstate__(self, state):
+        # If necessary, restore the TreeBuilder by looking it up.
+        self.__dict__ = state
+        if isinstance(self.builder, type):
+            self.builder = self.builder()
+        elif not self.builder:
+            # We don't know which builder was used to build this
+            # parse tree, so use a default we know is always available.
+            self.builder = HTMLParserTreeBuilder()
+        self.builder.soup = self
+        self.reset()
+        self._feed()
+        return state
+
+    
     @classmethod
     def _decode_markup(cls, markup):
         """Ensure `markup` is bytes so it's safe to send into warnings.warn.
@@ -411,11 +408,13 @@ class BeautifulSoup(Tag):
         return decoded
 
     @classmethod
-    def _check_markup_is_url(cls, markup):
+    def _markup_is_url(cls, markup):
         """Error-handling method to raise a warning if incoming markup looks
         like a URL.
 
         :param markup: A string.
+        :return: Whether or not the markup resembles a URL
+            closely enough to justify a warning.
         """
         if isinstance(markup, bytes):
             space = b' '
@@ -424,20 +423,51 @@ class BeautifulSoup(Tag):
             space = ' '
             cant_start_with = ("http:", "https:")
         else:
-            return
+            return False
 
         if any(markup.startswith(prefix) for prefix in cant_start_with):
             if not space in markup:
                 warnings.warn(
-                    '"%s" looks like a URL. Beautiful Soup is not an'
-                    ' HTTP client. You should probably use an HTTP client like'
-                    ' requests to get the document behind the URL, and feed'
-                    ' that document to Beautiful Soup.' % cls._decode_markup(
-                        markup
-                    ),
-                    MarkupResemblesLocatorWarning
+                    'The input looks more like a URL than markup. You may want to use'
+                    ' an HTTP client like requests to get the document behind'
+                    ' the URL, and feed that document to Beautiful Soup.',
+                    MarkupResemblesLocatorWarning,
+                    stacklevel=3
                 )
+                return True
+        return False
 
+    @classmethod
+    def _markup_resembles_filename(cls, markup):
+        """Error-handling method to raise a warning if incoming markup
+        resembles a filename.
+
+        :param markup: A bytestring or string.
+        :return: Whether or not the markup resembles a filename
+            closely enough to justify a warning.
+        """
+        path_characters = '/\\'
+        extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt']
+        if isinstance(markup, bytes):
+            path_characters = path_characters.encode("utf8")
+            extensions = [x.encode('utf8') for x in extensions]
+        filelike = False
+        if any(x in markup for x in path_characters):
+            filelike = True
+        else:
+            lower = markup.lower()
+            if any(lower.endswith(ext) for ext in extensions):
+                filelike = True
+        if filelike:
+            warnings.warn(
+                'The input looks more like a filename than markup. You may'
+                ' want to open this file and pass the filehandle into'
+                ' Beautiful Soup.',
+                MarkupResemblesLocatorWarning, stacklevel=3
+            )
+            return True
+        return False
+    
     def _feed(self):
         """Internal method that parses previously set markup, creating a large
         number of Tag and NavigableString objects.
@@ -464,6 +494,7 @@ class BeautifulSoup(Tag):
         self.open_tag_counter = Counter()
         self.preserve_whitespace_tag_stack = []
         self.string_container_stack = []
+        self._most_recent_element = None
         self.pushTag(self)
 
     def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
@@ -689,7 +720,7 @@ class BeautifulSoup(Tag):
         return most_recently_popped
 
     def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
-                        sourcepos=None):
+                        sourcepos=None, namespaces=None):
         """Called by the tree builder when a new tag is encountered.
 
         :param name: Name of the tag.
@@ -699,6 +730,8 @@ class BeautifulSoup(Tag):
             source document.
         :param sourcepos: The character position within `sourceline` where this
             tag was found.
+        :param namespaces: A dictionary of all namespace prefix mappings 
+            currently in scope in the document.
 
         If this method returns None, the tag was rejected by an active
         SoupStrainer. You should proceed as if the tag had not occurred
@@ -716,7 +749,8 @@ class BeautifulSoup(Tag):
         tag = self.element_classes.get(Tag, Tag)(
             self, self.builder, name, namespace, nsprefix, attrs,
             self.currentTag, self._most_recent_element,
-            sourceline=sourceline, sourcepos=sourcepos
+            sourceline=sourceline, sourcepos=sourcepos,
+            namespaces=namespaces
         )
         if tag is None:
             return tag
@@ -735,14 +769,14 @@ class BeautifulSoup(Tag):
         #print("End tag: " + name)
         self.endData()
         self._popToTag(name, nsprefix)
-
+        
     def handle_data(self, data):
         """Called by the tree builder when a chunk of textual data is encountered."""
         self.current_data.append(data)
        
     def decode(self, pretty_print=False,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
         """Returns a string or Unicode representation of the parse tree
             as an HTML or XML document.
 
@@ -769,7 +803,7 @@ class BeautifulSoup(Tag):
         else:
             indent_level = 0
         return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
 
 # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
@@ -782,7 +816,9 @@ class BeautifulStoneSoup(BeautifulSoup):
         kwargs['features'] = 'xml'
         warnings.warn(
             'The BeautifulStoneSoup class is deprecated. Instead of using '
-            'it, pass features="xml" into the BeautifulSoup constructor.')
+            'it, pass features="xml" into the BeautifulSoup constructor.',
+            DeprecationWarning, stacklevel=2
+        )
         super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
 
 
diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py
index bd44905e..ffb31fc2 100644
--- a/lib/bs4/builder/__init__.py
+++ b/lib/bs4/builder/__init__.py
@@ -3,10 +3,14 @@ __license__ = "MIT"
 
 from collections import defaultdict
 import itertools
+import re
+import warnings
 import sys
 from bs4.element import (
     CharsetMetaAttributeValue,
     ContentMetaAttributeValue,
+    RubyParenthesisString,
+    RubyTextString,
     Stylesheet,
     Script,
     TemplateString,
@@ -28,6 +32,12 @@ XML = 'xml'
 HTML = 'html'
 HTML_5 = 'html5'
 
+class XMLParsedAsHTMLWarning(UserWarning):
+    """The warning issued when an HTML parser is used to parse
+    XML that is not XHTML.
+    """
+    MESSAGE = """It looks like you're parsing an XML document using an HTML parser. If this really is an HTML document (maybe it's XHTML?), you can ignore or filter this warning. If it's XML, you should know that using an XML parser will be more reliable. To parse this document as XML, make sure you have the lxml package installed, and pass the keyword argument `features="xml"` into the BeautifulSoup constructor."""
+
 
 class TreeBuilderRegistry(object):
     """A way of looking up TreeBuilder subclasses by their name or by desired
@@ -112,7 +122,7 @@ class TreeBuilder(object):
     
     # A value for these tag/attribute combinations is a space- or
     # comma-separated list of CDATA, rather than a single CDATA.
-    DEFAULT_CDATA_LIST_ATTRIBUTES = {}
+    DEFAULT_CDATA_LIST_ATTRIBUTES = defaultdict(list)
 
     # Whitespace should be preserved inside these tags.
     DEFAULT_PRESERVE_WHITESPACE_TAGS = set()
@@ -319,7 +329,7 @@ class TreeBuilder(object):
                         values = value
                     attrs[attr] = values
         return attrs
-
+    
 class SAXTreeBuilder(TreeBuilder):
     """A Beautiful Soup treebuilder that listens for SAX events.
 
@@ -390,17 +400,25 @@ class HTMLTreeBuilder(TreeBuilder):
     # you need to use it.
     block_elements = set(["address", "article", "aside", "blockquote", "canvas", "dd", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hr", "li", "main", "nav", "noscript", "ol", "output", "p", "pre", "section", "table", "tfoot", "ul", "video"])
 
-    # The HTML standard defines an unusual content model for these tags.
-    # We represent this by using a string class other than NavigableString
-    # inside these tags.
+    # These HTML tags need special treatment so they can be
+    # represented by a string class other than NavigableString.
     #
-    # I made this list by going through the HTML spec
+    # For some of these tags, it's because the HTML standard defines
+    # an unusual content model for them. I made this list by going
+    # through the HTML spec
     # (https://html.spec.whatwg.org/#metadata-content) and looking for
     # "metadata content" elements that can contain strings.
     #
+    # The Ruby tags (<rt> and <rp>) are here despite being normal
+    # "phrasing content" tags, because the content they contain is
+    # qualitatively different from other text in the document, and it
+    # can be useful to be able to distinguish it.
+    #
     # TODO: Arguably <noscript> could go here but it seems
     # qualitatively different from the other tags.
     DEFAULT_STRING_CONTAINERS = {
+        'rt' : RubyTextString,
+        'rp' : RubyParenthesisString,
         'style': Stylesheet,
         'script': Script,
         'template': TemplateString,
@@ -431,7 +449,7 @@ class HTMLTreeBuilder(TreeBuilder):
         }
 
     DEFAULT_PRESERVE_WHITESPACE_TAGS = set(['pre', 'textarea'])
-    
+
     def set_up_substitutions(self, tag):
         """Replace the declared encoding in a <meta> tag with a placeholder,
         to be substituted when the tag is output to a string.
@@ -475,6 +493,104 @@ class HTMLTreeBuilder(TreeBuilder):
 
         return (meta_encoding is not None)
 
+class DetectsXMLParsedAsHTML(object):
+    """A mixin class for any class (a TreeBuilder, or some class used by a
+    TreeBuilder) that's in a position to detect whether an XML
+    document is being incorrectly parsed as HTML, and issue an
+    appropriate warning.
+
+    This requires being able to observe an incoming processing
+    instruction that might be an XML declaration, and also able to
+    observe tags as they're opened. If you can't do that for a given
+    TreeBuilder, there's a less reliable implementation based on
+    examining the raw markup.
+    """
+
+    # Regular expression for seeing if markup has an <html> tag.
+    LOOKS_LIKE_HTML = re.compile("<[^ +]html", re.I)
+    LOOKS_LIKE_HTML_B = re.compile(b"<[^ +]html", re.I)
+
+    XML_PREFIX = '<?xml'
+    XML_PREFIX_B = b'<?xml'
+    
+    @classmethod
+    def warn_if_markup_looks_like_xml(cls, markup, stacklevel=3):
+        """Perform a check on some markup to see if it looks like XML
+        that's not XHTML. If so, issue a warning.
+
+        This is much less reliable than doing the check while parsing,
+        but some of the tree builders can't do that.
+
+        :param stacklevel: The stacklevel of the code calling this
+        function.
+
+        :return: True if the markup looks like non-XHTML XML, False
+        otherwise.
+
+        """
+        if isinstance(markup, bytes):
+            prefix = cls.XML_PREFIX_B
+            looks_like_html = cls.LOOKS_LIKE_HTML_B
+        else:
+            prefix = cls.XML_PREFIX
+            looks_like_html = cls.LOOKS_LIKE_HTML
+        
+        if (markup is not None
+            and markup.startswith(prefix)
+            and not looks_like_html.search(markup[:500])
+        ):
+            cls._warn(stacklevel=stacklevel+2)
+            return True
+        return False
+
+    @classmethod
+    def _warn(cls, stacklevel=5):
+        """Issue a warning about XML being parsed as HTML."""
+        warnings.warn(
+            XMLParsedAsHTMLWarning.MESSAGE, XMLParsedAsHTMLWarning,
+            stacklevel=stacklevel
+        )
+        
+    def _initialize_xml_detector(self):
+        """Call this method before parsing a document."""
+        self._first_processing_instruction = None
+        self._root_tag = None
+       
+    def _document_might_be_xml(self, processing_instruction):
+        """Call this method when encountering an XML declaration, or a
+        "processing instruction" that might be an XML declaration.
+        """
+        if (self._first_processing_instruction is not None
+            or self._root_tag is not None):
+            # The document has already started. Don't bother checking
+            # anymore.
+            return
+
+        self._first_processing_instruction = processing_instruction
+
+        # We won't know until we encounter the first tag whether or
+        # not this is actually a problem.
+        
+    def _root_tag_encountered(self, name):
+        """Call this when you encounter the document's root tag.
+
+        This is where we actually check whether an XML document is
+        being incorrectly parsed as HTML, and issue the warning.
+        """
+        if self._root_tag is not None:
+            # This method was incorrectly called multiple times. Do
+            # nothing.
+            return
+
+        self._root_tag = name
+        if (name != 'html' and self._first_processing_instruction is not None
+            and self._first_processing_instruction.lower().startswith('xml ')):
+            # We encountered an XML declaration and then a tag other
+            # than 'html'. This is a reliable indicator that a
+            # non-XHTML document is being parsed as XML.
+            self._warn()
+
+    
 def register_treebuilders_from(module):
     """Copy TreeBuilders from the given module into this module."""
     this_module = sys.modules[__name__]
diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py
index 69aefd72..7c46a851 100644
--- a/lib/bs4/builder/_html5lib.py
+++ b/lib/bs4/builder/_html5lib.py
@@ -8,6 +8,7 @@ __all__ = [
 import warnings
 import re
 from bs4.builder import (
+    DetectsXMLParsedAsHTML,
     PERMISSIVE,
     HTML,
     HTML_5,
@@ -69,13 +70,26 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
         # ATM because the html5lib TreeBuilder doesn't use
         # UnicodeDammit.
         if exclude_encodings:
-            warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
+            warnings.warn(
+                "You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.",
+                stacklevel=3
+            )
+
+        # html5lib only parses HTML, so if it's given XML that's worth
+        # noting.
+        DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
+            markup, stacklevel=3
+        )
+
         yield (markup, None, None, False)
 
     # These methods are defined by Beautiful Soup.
     def feed(self, markup):
         if self.soup.parse_only is not None:
-            warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
+            warnings.warn(
+                "You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.",
+                stacklevel=4
+            )
         parser = html5lib.HTMLParser(tree=self.create_treebuilder)
         self.underlying_builder.parser = parser
         extra_kwargs = dict()
@@ -242,10 +256,10 @@ class AttrList(object):
     def __setitem__(self, name, value):
         # If this attribute is a multi-valued attribute for this element,
         # turn its value into a list.
-        list_attr = self.element.cdata_list_attributes
-        if (name in list_attr['*']
+        list_attr = self.element.cdata_list_attributes or {}
+        if (name in list_attr.get('*', [])
             or (self.element.name in list_attr
-                and name in list_attr[self.element.name])):
+                and name in list_attr.get(self.element.name, []))):
             # A node that is being cloned may have already undergone
             # this procedure.
             if not isinstance(value, list):
diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py
index 70e9be84..3cc187f8 100644
--- a/lib/bs4/builder/_htmlparser.py
+++ b/lib/bs4/builder/_htmlparser.py
@@ -10,30 +10,9 @@ __all__ = [
 
 from html.parser import HTMLParser
 
-try:
-    from html.parser import HTMLParseError
-except ImportError as e:
-    # HTMLParseError is removed in Python 3.5. Since it can never be
-    # thrown in 3.5, we can just define our own class as a placeholder.
-    class HTMLParseError(Exception):
-        pass
-
 import sys
 import warnings
 
-# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
-# argument, which we'd like to set to False. Unfortunately,
-# http://bugs.python.org/issue13273 makes strict=True a better bet
-# before Python 3.2.3.
-#
-# At the end of this file, we monkeypatch HTMLParser so that
-# strict=True works well on Python 3.2.2.
-major, minor, release = sys.version_info[:3]
-CONSTRUCTOR_TAKES_STRICT = major == 3 and minor == 2 and release >= 3
-CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
-CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
-
-
 from bs4.element import (
     CData,
     Comment,
@@ -44,6 +23,8 @@ from bs4.element import (
 from bs4.dammit import EntitySubstitution, UnicodeDammit
 
 from bs4.builder import (
+    DetectsXMLParsedAsHTML,
+    ParserRejectedMarkup,
     HTML,
     HTMLTreeBuilder,
     STRICT,
@@ -52,7 +33,7 @@ from bs4.builder import (
 
 HTMLPARSER = 'html.parser'
 
-class BeautifulSoupHTMLParser(HTMLParser):
+class BeautifulSoupHTMLParser(HTMLParser, DetectsXMLParsedAsHTML):
     """A subclass of the Python standard library's HTMLParser class, which
     listens for HTMLParser events and translates them into calls
     to Beautiful Soup's tree construction API.
@@ -88,19 +69,24 @@ class BeautifulSoupHTMLParser(HTMLParser):
         # will ignore, assuming they ever show up.
         self.already_closed_empty_element = []
 
-    def error(self, msg):
-        """In Python 3, HTMLParser subclasses must implement error(), although
-        this requirement doesn't appear to be documented.
+        self._initialize_xml_detector()
 
-        In Python 2, HTMLParser implements error() by raising an exception,
-        which we don't want to do.
+    def error(self, message):
+        # NOTE: This method is required so long as Python 3.9 is
+        # supported. The corresponding code is removed from HTMLParser
+        # in 3.5, but not removed from ParserBase until 3.10.
+        # https://github.com/python/cpython/issues/76025
+        #
+        # The original implementation turned the error into a warning,
+        # but in every case I discovered, this made HTMLParser
+        # immediately crash with an error message that was less
+        # helpful than the warning. The new implementation makes it
+        # more clear that html.parser just can't parse this
+        # markup. The 3.10 implementation does the same, though it
+        # raises AssertionError rather than calling a method. (We
+        # catch this error and wrap it in a ParserRejectedMarkup.)
+        raise ParserRejectedMarkup(message)
 
-        In any event, this method is called only on very strange
-        markup and our best strategy is to pretend it didn't happen
-        and keep going.
-        """
-        warnings.warn(msg)
-        
     def handle_startendtag(self, name, attrs):
         """Handle an incoming empty-element tag.
 
@@ -167,6 +153,9 @@ class BeautifulSoupHTMLParser(HTMLParser):
             # But we might encounter an explicit closing tag for this tag
             # later on. If so, we want to ignore it.
             self.already_closed_empty_element.append(name)
+
+        if self._root_tag is None:
+            self._root_tag_encountered(name)
             
     def handle_endtag(self, name, check_already_closed=True):
         """Handle a closing tag, e.g. '</tag>'
@@ -185,7 +174,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
             self.already_closed_empty_element.remove(name)
         else:
             self.soup.handle_endtag(name)
-
+            
     def handle_data(self, data):
         """Handle some textual data that shows up between tags."""
         self.soup.handle_data(data)
@@ -197,9 +186,10 @@ class BeautifulSoupHTMLParser(HTMLParser):
 
         :param name: Character number, possibly in hexadecimal.
         """
-        # XXX workaround for a bug in HTMLParser. Remove this once
-        # it's fixed in all supported versions.
-        # http://bugs.python.org/issue13633
+        # TODO: This was originally a workaround for a bug in
+        # HTMLParser. (http://bugs.python.org/issue13633) The bug has
+        # been fixed, but removing this code still makes some
+        # Beautiful Soup tests fail. This needs investigation.
         if name.startswith('x'):
             real_name = int(name.lstrip('x'), 16)
         elif name.startswith('X'):
@@ -288,6 +278,7 @@ class BeautifulSoupHTMLParser(HTMLParser):
         """
         self.soup.endData()
         self.soup.handle_data(data)
+        self._document_might_be_xml(data)
         self.soup.endData(ProcessingInstruction)
 
 
@@ -326,10 +317,7 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         parser_args = parser_args or []
         parser_kwargs = parser_kwargs or {}
         parser_kwargs.update(extra_parser_kwargs)
-        if CONSTRUCTOR_TAKES_STRICT and not CONSTRUCTOR_STRICT_IS_DEPRECATED:
-            parser_kwargs['strict'] = False
-        if CONSTRUCTOR_TAKES_CONVERT_CHARREFS:
-            parser_kwargs['convert_charrefs'] = False
+        parser_kwargs['convert_charrefs'] = False
         self.parser_args = (parser_args, parser_kwargs)
         
     def prepare_markup(self, markup, user_specified_encoding=None,
@@ -391,102 +379,9 @@ class HTMLParserTreeBuilder(HTMLTreeBuilder):
         try:
             parser.feed(markup)
             parser.close()
-        except HTMLParseError as e:
-            warnings.warn(RuntimeWarning(
-                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
-            raise e
+        except AssertionError as e:
+            # html.parser raises AssertionError in rare cases to
+            # indicate a fatal problem with the markup, especially
+            # when there's an error in the doctype declaration.
+            raise ParserRejectedMarkup(e)
         parser.already_closed_empty_element = []
-
-# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
-# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
-# string.
-#
-# XXX This code can be removed once most Python 3 users are on 3.2.3.
-if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
-    import re
-    attrfind_tolerant = re.compile(
-        r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
-        r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
-    HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
-
-    locatestarttagend = re.compile(r"""
-  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
-  (?:\s+                             # whitespace before attribute name
-    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
-      (?:\s*=\s*                     # value indicator
-        (?:'[^']*'                   # LITA-enclosed value
-          |\"[^\"]*\"                # LIT-enclosed value
-          |[^'\">\s]+                # bare value
-         )
-       )?
-     )
-   )*
-  \s*                                # trailing whitespace
-""", re.VERBOSE)
-    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
-
-    from html.parser import tagfind, attrfind
-
-    def parse_starttag(self, i):
-        self.__starttag_text = None
-        endpos = self.check_for_whole_start_tag(i)
-        if endpos < 0:
-            return endpos
-        rawdata = self.rawdata
-        self.__starttag_text = rawdata[i:endpos]
-
-        # Now parse the data between i+1 and j into a tag and attrs
-        attrs = []
-        match = tagfind.match(rawdata, i+1)
-        assert match, 'unexpected call to parse_starttag()'
-        k = match.end()
-        self.lasttag = tag = rawdata[i+1:k].lower()
-        while k < endpos:
-            if self.strict:
-                m = attrfind.match(rawdata, k)
-            else:
-                m = attrfind_tolerant.match(rawdata, k)
-            if not m:
-                break
-            attrname, rest, attrvalue = m.group(1, 2, 3)
-            if not rest:
-                attrvalue = None
-            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
-                 attrvalue[:1] == '"' == attrvalue[-1:]:
-                attrvalue = attrvalue[1:-1]
-            if attrvalue:
-                attrvalue = self.unescape(attrvalue)
-            attrs.append((attrname.lower(), attrvalue))
-            k = m.end()
-
-        end = rawdata[k:endpos].strip()
-        if end not in (">", "/>"):
-            lineno, offset = self.getpos()
-            if "\n" in self.__starttag_text:
-                lineno = lineno + self.__starttag_text.count("\n")
-                offset = len(self.__starttag_text) \
-                         - self.__starttag_text.rfind("\n")
-            else:
-                offset = offset + len(self.__starttag_text)
-            if self.strict:
-                self.error("junk characters in start tag: %r"
-                           % (rawdata[k:endpos][:20],))
-            self.handle_data(rawdata[i:endpos])
-            return endpos
-        if end.endswith('/>'):
-            # XHTML-style empty tag: <span attr="value" />
-            self.handle_startendtag(tag, attrs)
-        else:
-            self.handle_starttag(tag, attrs)
-            if tag in self.CDATA_CONTENT_ELEMENTS:
-                self.set_cdata_mode(tag)
-        return endpos
-
-    def set_cdata_mode(self, elem):
-        self.cdata_elem = elem.lower()
-        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
-
-    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
-    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
-
-    CONSTRUCTOR_TAKES_STRICT = True
diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py
index 11c9a696..4f7cf746 100644
--- a/lib/bs4/builder/_lxml.py
+++ b/lib/bs4/builder/_lxml.py
@@ -22,6 +22,7 @@ from bs4.element import (
     XMLProcessingInstruction,
 )
 from bs4.builder import (
+    DetectsXMLParsedAsHTML,
     FAST,
     HTML,
     HTMLTreeBuilder,
@@ -79,15 +80,24 @@ class LXMLTreeBuilderForXML(TreeBuilder):
 
         This might be useful later on when creating CSS selectors.
 
+        This will track (almost) all namespaces, even ones that were
+        only in scope for part of the document. If two namespaces have
+        the same prefix, only the first one encountered will be
+        tracked. Un-prefixed namespaces are not tracked.
+
         :param mapping: A dictionary mapping namespace prefixes to URIs.
         """
         for key, value in list(mapping.items()):
+            # This is 'if key' and not 'if key is not None' because we
+            # don't track un-prefixed namespaces. Soupselect will
+            # treat an un-prefixed namespace as the default, which
+            # causes confusion in some cases.
             if key and key not in self.soup._namespaces:
                 # Let the BeautifulSoup object know about a new namespace.
                 # If there are multiple namespaces defined with the same
                 # prefix, the first one in the document takes precedence.
                 self.soup._namespaces[key] = value
-
+                
     def default_parser(self, encoding):
         """Find the default parser for the given encoding.
 
@@ -125,6 +135,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
             self.empty_element_tags = set(empty_element_tags)
         self.soup = None
         self.nsmaps = [self.DEFAULT_NSMAPS_INVERTED]
+        self.active_namespace_prefixes = [dict(self.DEFAULT_NSMAPS)]
         super(LXMLTreeBuilderForXML, self).__init__(**kwargs)
         
     def _getNsTag(self, tag):
@@ -166,12 +177,23 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         is_html = not self.is_xml
         if is_html:
             self.processing_instruction_class = ProcessingInstruction
+            # We're in HTML mode, so if we're given XML, that's worth
+            # noting.
+            DetectsXMLParsedAsHTML.warn_if_markup_looks_like_xml(
+                markup, stacklevel=3
+            )
         else:
             self.processing_instruction_class = XMLProcessingInstruction
 
         if isinstance(markup, str):
             # We were given Unicode. Maybe lxml can parse Unicode on
             # this system?
+
+            # TODO: This is a workaround for
+            # https://bugs.launchpad.net/lxml/+bug/1948551.
+            # We can remove it once the upstream issue is fixed.
+            if len(markup) > 0 and markup[0] == u'\N{BYTE ORDER MARK}':
+                markup = markup[1:]
             yield markup, None, document_declared_encoding, False
 
         if isinstance(markup, str):
@@ -240,6 +262,20 @@ class LXMLTreeBuilderForXML(TreeBuilder):
             # mappings.
             self.nsmaps.append(_invert(nsmap))
 
+            # The currently active namespace prefixes have
+            # changed. Calculate the new mapping so it can be stored
+            # with all Tag objects created while these prefixes are in
+            # scope.
+            current_mapping = dict(self.active_namespace_prefixes[-1])
+            current_mapping.update(nsmap)
+
+            # We should not track un-prefixed namespaces as we can only hold one
+            # and it will be recognized as the default namespace by soupsieve,
+            # which may be confusing in some situations.
+            if '' in current_mapping:
+                del current_mapping['']
+            self.active_namespace_prefixes.append(current_mapping)
+            
             # Also treat the namespace mapping as a set of attributes on the
             # tag, so we can recreate it later.
             attrs = attrs.copy()
@@ -264,8 +300,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
 
         namespace, name = self._getNsTag(name)
         nsprefix = self._prefix_for_namespace(namespace)
-        self.soup.handle_starttag(name, namespace, nsprefix, attrs)
-
+        self.soup.handle_starttag(
+            name, namespace, nsprefix, attrs,
+            namespaces=self.active_namespace_prefixes[-1]
+        )
+        
     def _prefix_for_namespace(self, namespace):
         """Find the currently active prefix for the given namespace."""
         if namespace is None:
@@ -289,13 +328,20 @@ class LXMLTreeBuilderForXML(TreeBuilder):
         if len(self.nsmaps) > 1:
             # This tag, or one of its parents, introduced a namespace
             # mapping, so pop it off the stack.
-            self.nsmaps.pop()
+            out_of_scope_nsmap = self.nsmaps.pop()
 
+            if out_of_scope_nsmap is not None:
+                # This tag introduced a namespace mapping which is no
+                # longer in scope. Recalculate the currently active
+                # namespace prefixes.
+                self.active_namespace_prefixes.pop()
+            
     def pi(self, target, data):
         self.soup.endData()
-        self.soup.handle_data(target + ' ' + data)
+        data = target + ' ' + data
+        self.soup.handle_data(data)
         self.soup.endData(self.processing_instruction_class)
-
+        
     def data(self, content):
         self.soup.handle_data(content)
 
diff --git a/lib/bs4/css.py b/lib/bs4/css.py
new file mode 100644
index 00000000..245ac601
--- /dev/null
+++ b/lib/bs4/css.py
@@ -0,0 +1,280 @@
+"""Integration code for CSS selectors using Soup Sieve (pypi: soupsieve)."""
+
+import warnings
+try:
+    import soupsieve
+except ImportError as e:
+    soupsieve = None
+    warnings.warn(
+        'The soupsieve package is not installed. CSS selectors cannot be used.'
+    )
+
+
+class CSS(object):
+    """A proxy object against the soupsieve library, to simplify its
+    CSS selector API.
+
+    Acquire this object through the .css attribute on the
+    BeautifulSoup object, or on the Tag you want to use as the
+    starting point for a CSS selector.
+
+    The main advantage of doing this is that the tag to be selected
+    against doesn't need to be explicitly specified in the function
+    calls, since it's already scoped to a tag.
+    """
+
+    def __init__(self, tag, api=soupsieve):
+        """Constructor.
+
+        You don't need to instantiate this class yourself; instead,
+        access the .css attribute on the BeautifulSoup object, or on
+        the Tag you want to use as the starting point for your CSS
+        selector.
+
+        :param tag: All CSS selectors will use this as their starting
+        point.
+
+        :param api: A plug-in replacement for the soupsieve module,
+        designed mainly for use in tests.
+        """
+        if api is None:
+            raise NotImplementedError(
+                "Cannot execute CSS selectors because the soupsieve package is not installed."
+            )
+        self.api = api
+        self.tag = tag
+
+    def escape(self, ident):
+        """Escape a CSS identifier.
+
+        This is a simple wrapper around soupselect.escape(). See the
+        documentation for that function for more information.
+        """
+        if soupsieve is None:
+            raise NotImplementedError(
+                "Cannot escape CSS identifiers because the soupsieve package is not installed."
+            )
+        return self.api.escape(ident)
+
+    def _ns(self, ns, select):
+        """Normalize a dictionary of namespaces."""
+        if not isinstance(select, self.api.SoupSieve) and ns is None:
+            # If the selector is a precompiled pattern, it already has
+            # a namespace context compiled in, which cannot be
+            # replaced.
+            ns = self.tag._namespaces
+        return ns
+
+    def _rs(self, results):
+        """Normalize a list of results to a Resultset.
+
+        A ResultSet is more consistent with the rest of Beautiful
+        Soup's API, and ResultSet.__getattr__ has a helpful error
+        message if you try to treat a list of results as a single
+        result (a common mistake).
+        """
+        # Import here to avoid circular import
+        from bs4.element import ResultSet
+        return ResultSet(None, results)
+
+    def compile(self, select, namespaces=None, flags=0, **kwargs):
+        """Pre-compile a selector and return the compiled object.
+
+        :param selector: A CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.compile() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+           soupsieve.compile() method.
+
+        :return: A precompiled selector object.
+        :rtype: soupsieve.SoupSieve
+        """
+        return self.api.compile(
+            select, self._ns(namespaces, select), flags, **kwargs
+        )
+
+    def select_one(self, select, namespaces=None, flags=0, **kwargs):
+        """Perform a CSS selection operation on the current Tag and return the
+        first result.
+
+        This uses the Soup Sieve library. For more information, see
+        that library's documentation for the soupsieve.select_one()
+        method.
+
+        :param selector: A CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+           used in the CSS selector to namespace URIs. By default,
+           Beautiful Soup will use the prefixes it encountered while
+           parsing the document.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.select_one() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+           soupsieve.select_one() method.
+
+        :return: A Tag, or None if the selector has no match.
+        :rtype: bs4.element.Tag
+
+        """
+        return self.api.select_one(
+            select, self.tag, self._ns(namespaces, select), flags, **kwargs
+        )
+
+    def select(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+        """Perform a CSS selection operation on the current Tag.
+
+        This uses the Soup Sieve library. For more information, see
+        that library's documentation for the soupsieve.select()
+        method.
+
+        :param selector: A string containing a CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+            used in the CSS selector to namespace URIs. By default,
+            Beautiful Soup will pass in the prefixes it encountered while
+            parsing the document.
+
+        :param limit: After finding this number of results, stop looking.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.select() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+            soupsieve.select() method.
+
+        :return: A ResultSet of Tag objects.
+        :rtype: bs4.element.ResultSet
+
+        """
+        if limit is None:
+            limit = 0
+
+        return self._rs(
+            self.api.select(
+                select, self.tag, self._ns(namespaces, select), limit, flags,
+                **kwargs
+            )
+        )
+
+    def iselect(self, select, namespaces=None, limit=0, flags=0, **kwargs):
+        """Perform a CSS selection operation on the current Tag.
+
+        This uses the Soup Sieve library. For more information, see
+        that library's documentation for the soupsieve.iselect()
+        method. It is the same as select(), but it returns a generator
+        instead of a list.
+
+        :param selector: A string containing a CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+            used in the CSS selector to namespace URIs. By default,
+            Beautiful Soup will pass in the prefixes it encountered while
+            parsing the document.
+
+        :param limit: After finding this number of results, stop looking.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.iselect() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+            soupsieve.iselect() method.
+
+        :return: A generator
+        :rtype: types.GeneratorType
+        """
+        return self.api.iselect(
+            select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs
+        )
+
+    def closest(self, select, namespaces=None, flags=0, **kwargs):
+        """Find the Tag closest to this one that matches the given selector.
+
+        This uses the Soup Sieve library. For more information, see
+        that library's documentation for the soupsieve.closest()
+        method.
+
+        :param selector: A string containing a CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+            used in the CSS selector to namespace URIs. By default,
+            Beautiful Soup will pass in the prefixes it encountered while
+            parsing the document.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.closest() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+            soupsieve.closest() method.
+
+        :return: A Tag, or None if there is no match.
+        :rtype: bs4.Tag
+
+        """
+        return self.api.closest(
+            select, self.tag, self._ns(namespaces, select), flags, **kwargs
+        )
+
+    def match(self, select, namespaces=None, flags=0, **kwargs):
+        """Check whether this Tag matches the given CSS selector.
+
+        This uses the Soup Sieve library. For more information, see
+        that library's documentation for the soupsieve.match()
+        method.
+
+        :param: a CSS selector.
+
+        :param namespaces: A dictionary mapping namespace prefixes
+            used in the CSS selector to namespace URIs. By default,
+            Beautiful Soup will pass in the prefixes it encountered while
+            parsing the document.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.match() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+            soupsieve.match() method.
+
+        :return: True if this Tag matches the selector; False otherwise.
+        :rtype: bool
+        """
+        return self.api.match(
+            select, self.tag, self._ns(namespaces, select), flags, **kwargs
+        )
+
+    def filter(self, select, namespaces=None, flags=0, **kwargs):
+        """Filter this Tag's direct children based on the given CSS selector.
+
+        This uses the Soup Sieve library. It works the same way as
+        passing this Tag into that library's soupsieve.filter()
+        method. More information, for more information see the
+        documentation for soupsieve.filter().
+
+        :param namespaces: A dictionary mapping namespace prefixes
+            used in the CSS selector to namespace URIs. By default,
+            Beautiful Soup will pass in the prefixes it encountered while
+            parsing the document.
+
+        :param flags: Flags to be passed into Soup Sieve's
+            soupsieve.filter() method.
+
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
+            soupsieve.filter() method.
+
+        :return: A ResultSet of Tag objects.
+        :rtype: bs4.element.ResultSet
+
+        """
+        return self._rs(
+            self.api.filter(
+                select, self.tag, self._ns(namespaces, select), flags, **kwargs
+            )
+        )
diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py
index e017408b..692433c5 100644
--- a/lib/bs4/dammit.py
+++ b/lib/bs4/dammit.py
@@ -16,41 +16,37 @@ import re
 import logging
 import string
 
-# Import a library to autodetect character encodings.
-chardet_type = None
+# Import a library to autodetect character encodings. We'll support
+# any of a number of libraries that all support the same API:
+#
+# * cchardet
+# * chardet
+# * charset-normalizer
+chardet_module = None
 try:
-    # First try the fast C implementation.
     #  PyPI package: cchardet
-    import cchardet
+    import cchardet as chardet_module
+except ImportError:
+    try:
+        #  Debian package: python-chardet
+        #  PyPI package: chardet
+        import chardet as chardet_module
+    except ImportError:
+        try:
+            # PyPI package: charset-normalizer
+            import charset_normalizer as chardet_module
+        except ImportError:
+            # No chardet available.
+            chardet_module = None
+
+if chardet_module:
     def chardet_dammit(s):
         if isinstance(s, str):
             return None
-        return cchardet.detect(s)['encoding']
-except ImportError:
-    try:
-        # Fall back to the pure Python implementation
-        #  Debian package: python-chardet
-        #  PyPI package: chardet
-        import chardet
-        def chardet_dammit(s):
-            if isinstance(s, str):
-                return None
-            return chardet.detect(s)['encoding']
-        #import chardet.constants
-        #chardet.constants._debug = 1
-    except ImportError:
-        # No chardet available.
-        def chardet_dammit(s):
-            return None
-
-# Available from http://cjkpython.i18n.org/.
-#
-# TODO: This doesn't work anymore and the closest thing, iconv_codecs,
-# is GPL-licensed. Check whether this is still necessary.
-try:
-    import iconv_codec
-except ImportError:
-    pass
+        return chardet_module.detect(s)['encoding']
+else:
+    def chardet_dammit(s):
+        return None
 
 # Build bytestring and Unicode versions of regular expressions for finding
 # a declared encoding inside an XML or HTML document.
@@ -66,2246 +62,7 @@ encoding_res[str] = {
     'xml' : re.compile(xml_encoding, re.I)
 }
 
-try:
-    from html.entities import html5
-except ImportError:
-    # This is a copy of html.entities.html5 from Python 3.9. There's
-    # no equivalent table in Python 2, so we'll just provide a copy
-    # here.
-    html5 = {
-    'Aacute': '\xc1',
-    'aacute': '\xe1',
-    'Aacute;': '\xc1',
-    'aacute;': '\xe1',
-    'Abreve;': '\u0102',
-    'abreve;': '\u0103',
-    'ac;': '\u223e',
-    'acd;': '\u223f',
-    'acE;': '\u223e\u0333',
-    'Acirc': '\xc2',
-    'acirc': '\xe2',
-    'Acirc;': '\xc2',
-    'acirc;': '\xe2',
-    'acute': '\xb4',
-    'acute;': '\xb4',
-    'Acy;': '\u0410',
-    'acy;': '\u0430',
-    'AElig': '\xc6',
-    'aelig': '\xe6',
-    'AElig;': '\xc6',
-    'aelig;': '\xe6',
-    'af;': '\u2061',
-    'Afr;': '\U0001d504',
-    'afr;': '\U0001d51e',
-    'Agrave': '\xc0',
-    'agrave': '\xe0',
-    'Agrave;': '\xc0',
-    'agrave;': '\xe0',
-    'alefsym;': '\u2135',
-    'aleph;': '\u2135',
-    'Alpha;': '\u0391',
-    'alpha;': '\u03b1',
-    'Amacr;': '\u0100',
-    'amacr;': '\u0101',
-    'amalg;': '\u2a3f',
-    'AMP': '&',
-    'amp': '&',
-    'AMP;': '&',
-    'amp;': '&',
-    'And;': '\u2a53',
-    'and;': '\u2227',
-    'andand;': '\u2a55',
-    'andd;': '\u2a5c',
-    'andslope;': '\u2a58',
-    'andv;': '\u2a5a',
-    'ang;': '\u2220',
-    'ange;': '\u29a4',
-    'angle;': '\u2220',
-    'angmsd;': '\u2221',
-    'angmsdaa;': '\u29a8',
-    'angmsdab;': '\u29a9',
-    'angmsdac;': '\u29aa',
-    'angmsdad;': '\u29ab',
-    'angmsdae;': '\u29ac',
-    'angmsdaf;': '\u29ad',
-    'angmsdag;': '\u29ae',
-    'angmsdah;': '\u29af',
-    'angrt;': '\u221f',
-    'angrtvb;': '\u22be',
-    'angrtvbd;': '\u299d',
-    'angsph;': '\u2222',
-    'angst;': '\xc5',
-    'angzarr;': '\u237c',
-    'Aogon;': '\u0104',
-    'aogon;': '\u0105',
-    'Aopf;': '\U0001d538',
-    'aopf;': '\U0001d552',
-    'ap;': '\u2248',
-    'apacir;': '\u2a6f',
-    'apE;': '\u2a70',
-    'ape;': '\u224a',
-    'apid;': '\u224b',
-    'apos;': "'",
-    'ApplyFunction;': '\u2061',
-    'approx;': '\u2248',
-    'approxeq;': '\u224a',
-    'Aring': '\xc5',
-    'aring': '\xe5',
-    'Aring;': '\xc5',
-    'aring;': '\xe5',
-    'Ascr;': '\U0001d49c',
-    'ascr;': '\U0001d4b6',
-    'Assign;': '\u2254',
-    'ast;': '*',
-    'asymp;': '\u2248',
-    'asympeq;': '\u224d',
-    'Atilde': '\xc3',
-    'atilde': '\xe3',
-    'Atilde;': '\xc3',
-    'atilde;': '\xe3',
-    'Auml': '\xc4',
-    'auml': '\xe4',
-    'Auml;': '\xc4',
-    'auml;': '\xe4',
-    'awconint;': '\u2233',
-    'awint;': '\u2a11',
-    'backcong;': '\u224c',
-    'backepsilon;': '\u03f6',
-    'backprime;': '\u2035',
-    'backsim;': '\u223d',
-    'backsimeq;': '\u22cd',
-    'Backslash;': '\u2216',
-    'Barv;': '\u2ae7',
-    'barvee;': '\u22bd',
-    'Barwed;': '\u2306',
-    'barwed;': '\u2305',
-    'barwedge;': '\u2305',
-    'bbrk;': '\u23b5',
-    'bbrktbrk;': '\u23b6',
-    'bcong;': '\u224c',
-    'Bcy;': '\u0411',
-    'bcy;': '\u0431',
-    'bdquo;': '\u201e',
-    'becaus;': '\u2235',
-    'Because;': '\u2235',
-    'because;': '\u2235',
-    'bemptyv;': '\u29b0',
-    'bepsi;': '\u03f6',
-    'bernou;': '\u212c',
-    'Bernoullis;': '\u212c',
-    'Beta;': '\u0392',
-    'beta;': '\u03b2',
-    'beth;': '\u2136',
-    'between;': '\u226c',
-    'Bfr;': '\U0001d505',
-    'bfr;': '\U0001d51f',
-    'bigcap;': '\u22c2',
-    'bigcirc;': '\u25ef',
-    'bigcup;': '\u22c3',
-    'bigodot;': '\u2a00',
-    'bigoplus;': '\u2a01',
-    'bigotimes;': '\u2a02',
-    'bigsqcup;': '\u2a06',
-    'bigstar;': '\u2605',
-    'bigtriangledown;': '\u25bd',
-    'bigtriangleup;': '\u25b3',
-    'biguplus;': '\u2a04',
-    'bigvee;': '\u22c1',
-    'bigwedge;': '\u22c0',
-    'bkarow;': '\u290d',
-    'blacklozenge;': '\u29eb',
-    'blacksquare;': '\u25aa',
-    'blacktriangle;': '\u25b4',
-    'blacktriangledown;': '\u25be',
-    'blacktriangleleft;': '\u25c2',
-    'blacktriangleright;': '\u25b8',
-    'blank;': '\u2423',
-    'blk12;': '\u2592',
-    'blk14;': '\u2591',
-    'blk34;': '\u2593',
-    'block;': '\u2588',
-    'bne;': '=\u20e5',
-    'bnequiv;': '\u2261\u20e5',
-    'bNot;': '\u2aed',
-    'bnot;': '\u2310',
-    'Bopf;': '\U0001d539',
-    'bopf;': '\U0001d553',
-    'bot;': '\u22a5',
-    'bottom;': '\u22a5',
-    'bowtie;': '\u22c8',
-    'boxbox;': '\u29c9',
-    'boxDL;': '\u2557',
-    'boxDl;': '\u2556',
-    'boxdL;': '\u2555',
-    'boxdl;': '\u2510',
-    'boxDR;': '\u2554',
-    'boxDr;': '\u2553',
-    'boxdR;': '\u2552',
-    'boxdr;': '\u250c',
-    'boxH;': '\u2550',
-    'boxh;': '\u2500',
-    'boxHD;': '\u2566',
-    'boxHd;': '\u2564',
-    'boxhD;': '\u2565',
-    'boxhd;': '\u252c',
-    'boxHU;': '\u2569',
-    'boxHu;': '\u2567',
-    'boxhU;': '\u2568',
-    'boxhu;': '\u2534',
-    'boxminus;': '\u229f',
-    'boxplus;': '\u229e',
-    'boxtimes;': '\u22a0',
-    'boxUL;': '\u255d',
-    'boxUl;': '\u255c',
-    'boxuL;': '\u255b',
-    'boxul;': '\u2518',
-    'boxUR;': '\u255a',
-    'boxUr;': '\u2559',
-    'boxuR;': '\u2558',
-    'boxur;': '\u2514',
-    'boxV;': '\u2551',
-    'boxv;': '\u2502',
-    'boxVH;': '\u256c',
-    'boxVh;': '\u256b',
-    'boxvH;': '\u256a',
-    'boxvh;': '\u253c',
-    'boxVL;': '\u2563',
-    'boxVl;': '\u2562',
-    'boxvL;': '\u2561',
-    'boxvl;': '\u2524',
-    'boxVR;': '\u2560',
-    'boxVr;': '\u255f',
-    'boxvR;': '\u255e',
-    'boxvr;': '\u251c',
-    'bprime;': '\u2035',
-    'Breve;': '\u02d8',
-    'breve;': '\u02d8',
-    'brvbar': '\xa6',
-    'brvbar;': '\xa6',
-    'Bscr;': '\u212c',
-    'bscr;': '\U0001d4b7',
-    'bsemi;': '\u204f',
-    'bsim;': '\u223d',
-    'bsime;': '\u22cd',
-    'bsol;': '\\',
-    'bsolb;': '\u29c5',
-    'bsolhsub;': '\u27c8',
-    'bull;': '\u2022',
-    'bullet;': '\u2022',
-    'bump;': '\u224e',
-    'bumpE;': '\u2aae',
-    'bumpe;': '\u224f',
-    'Bumpeq;': '\u224e',
-    'bumpeq;': '\u224f',
-    'Cacute;': '\u0106',
-    'cacute;': '\u0107',
-    'Cap;': '\u22d2',
-    'cap;': '\u2229',
-    'capand;': '\u2a44',
-    'capbrcup;': '\u2a49',
-    'capcap;': '\u2a4b',
-    'capcup;': '\u2a47',
-    'capdot;': '\u2a40',
-    'CapitalDifferentialD;': '\u2145',
-    'caps;': '\u2229\ufe00',
-    'caret;': '\u2041',
-    'caron;': '\u02c7',
-    'Cayleys;': '\u212d',
-    'ccaps;': '\u2a4d',
-    'Ccaron;': '\u010c',
-    'ccaron;': '\u010d',
-    'Ccedil': '\xc7',
-    'ccedil': '\xe7',
-    'Ccedil;': '\xc7',
-    'ccedil;': '\xe7',
-    'Ccirc;': '\u0108',
-    'ccirc;': '\u0109',
-    'Cconint;': '\u2230',
-    'ccups;': '\u2a4c',
-    'ccupssm;': '\u2a50',
-    'Cdot;': '\u010a',
-    'cdot;': '\u010b',
-    'cedil': '\xb8',
-    'cedil;': '\xb8',
-    'Cedilla;': '\xb8',
-    'cemptyv;': '\u29b2',
-    'cent': '\xa2',
-    'cent;': '\xa2',
-    'CenterDot;': '\xb7',
-    'centerdot;': '\xb7',
-    'Cfr;': '\u212d',
-    'cfr;': '\U0001d520',
-    'CHcy;': '\u0427',
-    'chcy;': '\u0447',
-    'check;': '\u2713',
-    'checkmark;': '\u2713',
-    'Chi;': '\u03a7',
-    'chi;': '\u03c7',
-    'cir;': '\u25cb',
-    'circ;': '\u02c6',
-    'circeq;': '\u2257',
-    'circlearrowleft;': '\u21ba',
-    'circlearrowright;': '\u21bb',
-    'circledast;': '\u229b',
-    'circledcirc;': '\u229a',
-    'circleddash;': '\u229d',
-    'CircleDot;': '\u2299',
-    'circledR;': '\xae',
-    'circledS;': '\u24c8',
-    'CircleMinus;': '\u2296',
-    'CirclePlus;': '\u2295',
-    'CircleTimes;': '\u2297',
-    'cirE;': '\u29c3',
-    'cire;': '\u2257',
-    'cirfnint;': '\u2a10',
-    'cirmid;': '\u2aef',
-    'cirscir;': '\u29c2',
-    'ClockwiseContourIntegral;': '\u2232',
-    'CloseCurlyDoubleQuote;': '\u201d',
-    'CloseCurlyQuote;': '\u2019',
-    'clubs;': '\u2663',
-    'clubsuit;': '\u2663',
-    'Colon;': '\u2237',
-    'colon;': ':',
-    'Colone;': '\u2a74',
-    'colone;': '\u2254',
-    'coloneq;': '\u2254',
-    'comma;': ',',
-    'commat;': '@',
-    'comp;': '\u2201',
-    'compfn;': '\u2218',
-    'complement;': '\u2201',
-    'complexes;': '\u2102',
-    'cong;': '\u2245',
-    'congdot;': '\u2a6d',
-    'Congruent;': '\u2261',
-    'Conint;': '\u222f',
-    'conint;': '\u222e',
-    'ContourIntegral;': '\u222e',
-    'Copf;': '\u2102',
-    'copf;': '\U0001d554',
-    'coprod;': '\u2210',
-    'Coproduct;': '\u2210',
-    'COPY': '\xa9',
-    'copy': '\xa9',
-    'COPY;': '\xa9',
-    'copy;': '\xa9',
-    'copysr;': '\u2117',
-    'CounterClockwiseContourIntegral;': '\u2233',
-    'crarr;': '\u21b5',
-    'Cross;': '\u2a2f',
-    'cross;': '\u2717',
-    'Cscr;': '\U0001d49e',
-    'cscr;': '\U0001d4b8',
-    'csub;': '\u2acf',
-    'csube;': '\u2ad1',
-    'csup;': '\u2ad0',
-    'csupe;': '\u2ad2',
-    'ctdot;': '\u22ef',
-    'cudarrl;': '\u2938',
-    'cudarrr;': '\u2935',
-    'cuepr;': '\u22de',
-    'cuesc;': '\u22df',
-    'cularr;': '\u21b6',
-    'cularrp;': '\u293d',
-    'Cup;': '\u22d3',
-    'cup;': '\u222a',
-    'cupbrcap;': '\u2a48',
-    'CupCap;': '\u224d',
-    'cupcap;': '\u2a46',
-    'cupcup;': '\u2a4a',
-    'cupdot;': '\u228d',
-    'cupor;': '\u2a45',
-    'cups;': '\u222a\ufe00',
-    'curarr;': '\u21b7',
-    'curarrm;': '\u293c',
-    'curlyeqprec;': '\u22de',
-    'curlyeqsucc;': '\u22df',
-    'curlyvee;': '\u22ce',
-    'curlywedge;': '\u22cf',
-    'curren': '\xa4',
-    'curren;': '\xa4',
-    'curvearrowleft;': '\u21b6',
-    'curvearrowright;': '\u21b7',
-    'cuvee;': '\u22ce',
-    'cuwed;': '\u22cf',
-    'cwconint;': '\u2232',
-    'cwint;': '\u2231',
-    'cylcty;': '\u232d',
-    'Dagger;': '\u2021',
-    'dagger;': '\u2020',
-    'daleth;': '\u2138',
-    'Darr;': '\u21a1',
-    'dArr;': '\u21d3',
-    'darr;': '\u2193',
-    'dash;': '\u2010',
-    'Dashv;': '\u2ae4',
-    'dashv;': '\u22a3',
-    'dbkarow;': '\u290f',
-    'dblac;': '\u02dd',
-    'Dcaron;': '\u010e',
-    'dcaron;': '\u010f',
-    'Dcy;': '\u0414',
-    'dcy;': '\u0434',
-    'DD;': '\u2145',
-    'dd;': '\u2146',
-    'ddagger;': '\u2021',
-    'ddarr;': '\u21ca',
-    'DDotrahd;': '\u2911',
-    'ddotseq;': '\u2a77',
-    'deg': '\xb0',
-    'deg;': '\xb0',
-    'Del;': '\u2207',
-    'Delta;': '\u0394',
-    'delta;': '\u03b4',
-    'demptyv;': '\u29b1',
-    'dfisht;': '\u297f',
-    'Dfr;': '\U0001d507',
-    'dfr;': '\U0001d521',
-    'dHar;': '\u2965',
-    'dharl;': '\u21c3',
-    'dharr;': '\u21c2',
-    'DiacriticalAcute;': '\xb4',
-    'DiacriticalDot;': '\u02d9',
-    'DiacriticalDoubleAcute;': '\u02dd',
-    'DiacriticalGrave;': '`',
-    'DiacriticalTilde;': '\u02dc',
-    'diam;': '\u22c4',
-    'Diamond;': '\u22c4',
-    'diamond;': '\u22c4',
-    'diamondsuit;': '\u2666',
-    'diams;': '\u2666',
-    'die;': '\xa8',
-    'DifferentialD;': '\u2146',
-    'digamma;': '\u03dd',
-    'disin;': '\u22f2',
-    'div;': '\xf7',
-    'divide': '\xf7',
-    'divide;': '\xf7',
-    'divideontimes;': '\u22c7',
-    'divonx;': '\u22c7',
-    'DJcy;': '\u0402',
-    'djcy;': '\u0452',
-    'dlcorn;': '\u231e',
-    'dlcrop;': '\u230d',
-    'dollar;': '$',
-    'Dopf;': '\U0001d53b',
-    'dopf;': '\U0001d555',
-    'Dot;': '\xa8',
-    'dot;': '\u02d9',
-    'DotDot;': '\u20dc',
-    'doteq;': '\u2250',
-    'doteqdot;': '\u2251',
-    'DotEqual;': '\u2250',
-    'dotminus;': '\u2238',
-    'dotplus;': '\u2214',
-    'dotsquare;': '\u22a1',
-    'doublebarwedge;': '\u2306',
-    'DoubleContourIntegral;': '\u222f',
-    'DoubleDot;': '\xa8',
-    'DoubleDownArrow;': '\u21d3',
-    'DoubleLeftArrow;': '\u21d0',
-    'DoubleLeftRightArrow;': '\u21d4',
-    'DoubleLeftTee;': '\u2ae4',
-    'DoubleLongLeftArrow;': '\u27f8',
-    'DoubleLongLeftRightArrow;': '\u27fa',
-    'DoubleLongRightArrow;': '\u27f9',
-    'DoubleRightArrow;': '\u21d2',
-    'DoubleRightTee;': '\u22a8',
-    'DoubleUpArrow;': '\u21d1',
-    'DoubleUpDownArrow;': '\u21d5',
-    'DoubleVerticalBar;': '\u2225',
-    'DownArrow;': '\u2193',
-    'Downarrow;': '\u21d3',
-    'downarrow;': '\u2193',
-    'DownArrowBar;': '\u2913',
-    'DownArrowUpArrow;': '\u21f5',
-    'DownBreve;': '\u0311',
-    'downdownarrows;': '\u21ca',
-    'downharpoonleft;': '\u21c3',
-    'downharpoonright;': '\u21c2',
-    'DownLeftRightVector;': '\u2950',
-    'DownLeftTeeVector;': '\u295e',
-    'DownLeftVector;': '\u21bd',
-    'DownLeftVectorBar;': '\u2956',
-    'DownRightTeeVector;': '\u295f',
-    'DownRightVector;': '\u21c1',
-    'DownRightVectorBar;': '\u2957',
-    'DownTee;': '\u22a4',
-    'DownTeeArrow;': '\u21a7',
-    'drbkarow;': '\u2910',
-    'drcorn;': '\u231f',
-    'drcrop;': '\u230c',
-    'Dscr;': '\U0001d49f',
-    'dscr;': '\U0001d4b9',
-    'DScy;': '\u0405',
-    'dscy;': '\u0455',
-    'dsol;': '\u29f6',
-    'Dstrok;': '\u0110',
-    'dstrok;': '\u0111',
-    'dtdot;': '\u22f1',
-    'dtri;': '\u25bf',
-    'dtrif;': '\u25be',
-    'duarr;': '\u21f5',
-    'duhar;': '\u296f',
-    'dwangle;': '\u29a6',
-    'DZcy;': '\u040f',
-    'dzcy;': '\u045f',
-    'dzigrarr;': '\u27ff',
-    'Eacute': '\xc9',
-    'eacute': '\xe9',
-    'Eacute;': '\xc9',
-    'eacute;': '\xe9',
-    'easter;': '\u2a6e',
-    'Ecaron;': '\u011a',
-    'ecaron;': '\u011b',
-    'ecir;': '\u2256',
-    'Ecirc': '\xca',
-    'ecirc': '\xea',
-    'Ecirc;': '\xca',
-    'ecirc;': '\xea',
-    'ecolon;': '\u2255',
-    'Ecy;': '\u042d',
-    'ecy;': '\u044d',
-    'eDDot;': '\u2a77',
-    'Edot;': '\u0116',
-    'eDot;': '\u2251',
-    'edot;': '\u0117',
-    'ee;': '\u2147',
-    'efDot;': '\u2252',
-    'Efr;': '\U0001d508',
-    'efr;': '\U0001d522',
-    'eg;': '\u2a9a',
-    'Egrave': '\xc8',
-    'egrave': '\xe8',
-    'Egrave;': '\xc8',
-    'egrave;': '\xe8',
-    'egs;': '\u2a96',
-    'egsdot;': '\u2a98',
-    'el;': '\u2a99',
-    'Element;': '\u2208',
-    'elinters;': '\u23e7',
-    'ell;': '\u2113',
-    'els;': '\u2a95',
-    'elsdot;': '\u2a97',
-    'Emacr;': '\u0112',
-    'emacr;': '\u0113',
-    'empty;': '\u2205',
-    'emptyset;': '\u2205',
-    'EmptySmallSquare;': '\u25fb',
-    'emptyv;': '\u2205',
-    'EmptyVerySmallSquare;': '\u25ab',
-    'emsp13;': '\u2004',
-    'emsp14;': '\u2005',
-    'emsp;': '\u2003',
-    'ENG;': '\u014a',
-    'eng;': '\u014b',
-    'ensp;': '\u2002',
-    'Eogon;': '\u0118',
-    'eogon;': '\u0119',
-    'Eopf;': '\U0001d53c',
-    'eopf;': '\U0001d556',
-    'epar;': '\u22d5',
-    'eparsl;': '\u29e3',
-    'eplus;': '\u2a71',
-    'epsi;': '\u03b5',
-    'Epsilon;': '\u0395',
-    'epsilon;': '\u03b5',
-    'epsiv;': '\u03f5',
-    'eqcirc;': '\u2256',
-    'eqcolon;': '\u2255',
-    'eqsim;': '\u2242',
-    'eqslantgtr;': '\u2a96',
-    'eqslantless;': '\u2a95',
-    'Equal;': '\u2a75',
-    'equals;': '=',
-    'EqualTilde;': '\u2242',
-    'equest;': '\u225f',
-    'Equilibrium;': '\u21cc',
-    'equiv;': '\u2261',
-    'equivDD;': '\u2a78',
-    'eqvparsl;': '\u29e5',
-    'erarr;': '\u2971',
-    'erDot;': '\u2253',
-    'Escr;': '\u2130',
-    'escr;': '\u212f',
-    'esdot;': '\u2250',
-    'Esim;': '\u2a73',
-    'esim;': '\u2242',
-    'Eta;': '\u0397',
-    'eta;': '\u03b7',
-    'ETH': '\xd0',
-    'eth': '\xf0',
-    'ETH;': '\xd0',
-    'eth;': '\xf0',
-    'Euml': '\xcb',
-    'euml': '\xeb',
-    'Euml;': '\xcb',
-    'euml;': '\xeb',
-    'euro;': '\u20ac',
-    'excl;': '!',
-    'exist;': '\u2203',
-    'Exists;': '\u2203',
-    'expectation;': '\u2130',
-    'ExponentialE;': '\u2147',
-    'exponentiale;': '\u2147',
-    'fallingdotseq;': '\u2252',
-    'Fcy;': '\u0424',
-    'fcy;': '\u0444',
-    'female;': '\u2640',
-    'ffilig;': '\ufb03',
-    'fflig;': '\ufb00',
-    'ffllig;': '\ufb04',
-    'Ffr;': '\U0001d509',
-    'ffr;': '\U0001d523',
-    'filig;': '\ufb01',
-    'FilledSmallSquare;': '\u25fc',
-    'FilledVerySmallSquare;': '\u25aa',
-    'fjlig;': 'fj',
-    'flat;': '\u266d',
-    'fllig;': '\ufb02',
-    'fltns;': '\u25b1',
-    'fnof;': '\u0192',
-    'Fopf;': '\U0001d53d',
-    'fopf;': '\U0001d557',
-    'ForAll;': '\u2200',
-    'forall;': '\u2200',
-    'fork;': '\u22d4',
-    'forkv;': '\u2ad9',
-    'Fouriertrf;': '\u2131',
-    'fpartint;': '\u2a0d',
-    'frac12': '\xbd',
-    'frac12;': '\xbd',
-    'frac13;': '\u2153',
-    'frac14': '\xbc',
-    'frac14;': '\xbc',
-    'frac15;': '\u2155',
-    'frac16;': '\u2159',
-    'frac18;': '\u215b',
-    'frac23;': '\u2154',
-    'frac25;': '\u2156',
-    'frac34': '\xbe',
-    'frac34;': '\xbe',
-    'frac35;': '\u2157',
-    'frac38;': '\u215c',
-    'frac45;': '\u2158',
-    'frac56;': '\u215a',
-    'frac58;': '\u215d',
-    'frac78;': '\u215e',
-    'frasl;': '\u2044',
-    'frown;': '\u2322',
-    'Fscr;': '\u2131',
-    'fscr;': '\U0001d4bb',
-    'gacute;': '\u01f5',
-    'Gamma;': '\u0393',
-    'gamma;': '\u03b3',
-    'Gammad;': '\u03dc',
-    'gammad;': '\u03dd',
-    'gap;': '\u2a86',
-    'Gbreve;': '\u011e',
-    'gbreve;': '\u011f',
-    'Gcedil;': '\u0122',
-    'Gcirc;': '\u011c',
-    'gcirc;': '\u011d',
-    'Gcy;': '\u0413',
-    'gcy;': '\u0433',
-    'Gdot;': '\u0120',
-    'gdot;': '\u0121',
-    'gE;': '\u2267',
-    'ge;': '\u2265',
-    'gEl;': '\u2a8c',
-    'gel;': '\u22db',
-    'geq;': '\u2265',
-    'geqq;': '\u2267',
-    'geqslant;': '\u2a7e',
-    'ges;': '\u2a7e',
-    'gescc;': '\u2aa9',
-    'gesdot;': '\u2a80',
-    'gesdoto;': '\u2a82',
-    'gesdotol;': '\u2a84',
-    'gesl;': '\u22db\ufe00',
-    'gesles;': '\u2a94',
-    'Gfr;': '\U0001d50a',
-    'gfr;': '\U0001d524',
-    'Gg;': '\u22d9',
-    'gg;': '\u226b',
-    'ggg;': '\u22d9',
-    'gimel;': '\u2137',
-    'GJcy;': '\u0403',
-    'gjcy;': '\u0453',
-    'gl;': '\u2277',
-    'gla;': '\u2aa5',
-    'glE;': '\u2a92',
-    'glj;': '\u2aa4',
-    'gnap;': '\u2a8a',
-    'gnapprox;': '\u2a8a',
-    'gnE;': '\u2269',
-    'gne;': '\u2a88',
-    'gneq;': '\u2a88',
-    'gneqq;': '\u2269',
-    'gnsim;': '\u22e7',
-    'Gopf;': '\U0001d53e',
-    'gopf;': '\U0001d558',
-    'grave;': '`',
-    'GreaterEqual;': '\u2265',
-    'GreaterEqualLess;': '\u22db',
-    'GreaterFullEqual;': '\u2267',
-    'GreaterGreater;': '\u2aa2',
-    'GreaterLess;': '\u2277',
-    'GreaterSlantEqual;': '\u2a7e',
-    'GreaterTilde;': '\u2273',
-    'Gscr;': '\U0001d4a2',
-    'gscr;': '\u210a',
-    'gsim;': '\u2273',
-    'gsime;': '\u2a8e',
-    'gsiml;': '\u2a90',
-    'GT': '>',
-    'gt': '>',
-    'GT;': '>',
-    'Gt;': '\u226b',
-    'gt;': '>',
-    'gtcc;': '\u2aa7',
-    'gtcir;': '\u2a7a',
-    'gtdot;': '\u22d7',
-    'gtlPar;': '\u2995',
-    'gtquest;': '\u2a7c',
-    'gtrapprox;': '\u2a86',
-    'gtrarr;': '\u2978',
-    'gtrdot;': '\u22d7',
-    'gtreqless;': '\u22db',
-    'gtreqqless;': '\u2a8c',
-    'gtrless;': '\u2277',
-    'gtrsim;': '\u2273',
-    'gvertneqq;': '\u2269\ufe00',
-    'gvnE;': '\u2269\ufe00',
-    'Hacek;': '\u02c7',
-    'hairsp;': '\u200a',
-    'half;': '\xbd',
-    'hamilt;': '\u210b',
-    'HARDcy;': '\u042a',
-    'hardcy;': '\u044a',
-    'hArr;': '\u21d4',
-    'harr;': '\u2194',
-    'harrcir;': '\u2948',
-    'harrw;': '\u21ad',
-    'Hat;': '^',
-    'hbar;': '\u210f',
-    'Hcirc;': '\u0124',
-    'hcirc;': '\u0125',
-    'hearts;': '\u2665',
-    'heartsuit;': '\u2665',
-    'hellip;': '\u2026',
-    'hercon;': '\u22b9',
-    'Hfr;': '\u210c',
-    'hfr;': '\U0001d525',
-    'HilbertSpace;': '\u210b',
-    'hksearow;': '\u2925',
-    'hkswarow;': '\u2926',
-    'hoarr;': '\u21ff',
-    'homtht;': '\u223b',
-    'hookleftarrow;': '\u21a9',
-    'hookrightarrow;': '\u21aa',
-    'Hopf;': '\u210d',
-    'hopf;': '\U0001d559',
-    'horbar;': '\u2015',
-    'HorizontalLine;': '\u2500',
-    'Hscr;': '\u210b',
-    'hscr;': '\U0001d4bd',
-    'hslash;': '\u210f',
-    'Hstrok;': '\u0126',
-    'hstrok;': '\u0127',
-    'HumpDownHump;': '\u224e',
-    'HumpEqual;': '\u224f',
-    'hybull;': '\u2043',
-    'hyphen;': '\u2010',
-    'Iacute': '\xcd',
-    'iacute': '\xed',
-    'Iacute;': '\xcd',
-    'iacute;': '\xed',
-    'ic;': '\u2063',
-    'Icirc': '\xce',
-    'icirc': '\xee',
-    'Icirc;': '\xce',
-    'icirc;': '\xee',
-    'Icy;': '\u0418',
-    'icy;': '\u0438',
-    'Idot;': '\u0130',
-    'IEcy;': '\u0415',
-    'iecy;': '\u0435',
-    'iexcl': '\xa1',
-    'iexcl;': '\xa1',
-    'iff;': '\u21d4',
-    'Ifr;': '\u2111',
-    'ifr;': '\U0001d526',
-    'Igrave': '\xcc',
-    'igrave': '\xec',
-    'Igrave;': '\xcc',
-    'igrave;': '\xec',
-    'ii;': '\u2148',
-    'iiiint;': '\u2a0c',
-    'iiint;': '\u222d',
-    'iinfin;': '\u29dc',
-    'iiota;': '\u2129',
-    'IJlig;': '\u0132',
-    'ijlig;': '\u0133',
-    'Im;': '\u2111',
-    'Imacr;': '\u012a',
-    'imacr;': '\u012b',
-    'image;': '\u2111',
-    'ImaginaryI;': '\u2148',
-    'imagline;': '\u2110',
-    'imagpart;': '\u2111',
-    'imath;': '\u0131',
-    'imof;': '\u22b7',
-    'imped;': '\u01b5',
-    'Implies;': '\u21d2',
-    'in;': '\u2208',
-    'incare;': '\u2105',
-    'infin;': '\u221e',
-    'infintie;': '\u29dd',
-    'inodot;': '\u0131',
-    'Int;': '\u222c',
-    'int;': '\u222b',
-    'intcal;': '\u22ba',
-    'integers;': '\u2124',
-    'Integral;': '\u222b',
-    'intercal;': '\u22ba',
-    'Intersection;': '\u22c2',
-    'intlarhk;': '\u2a17',
-    'intprod;': '\u2a3c',
-    'InvisibleComma;': '\u2063',
-    'InvisibleTimes;': '\u2062',
-    'IOcy;': '\u0401',
-    'iocy;': '\u0451',
-    'Iogon;': '\u012e',
-    'iogon;': '\u012f',
-    'Iopf;': '\U0001d540',
-    'iopf;': '\U0001d55a',
-    'Iota;': '\u0399',
-    'iota;': '\u03b9',
-    'iprod;': '\u2a3c',
-    'iquest': '\xbf',
-    'iquest;': '\xbf',
-    'Iscr;': '\u2110',
-    'iscr;': '\U0001d4be',
-    'isin;': '\u2208',
-    'isindot;': '\u22f5',
-    'isinE;': '\u22f9',
-    'isins;': '\u22f4',
-    'isinsv;': '\u22f3',
-    'isinv;': '\u2208',
-    'it;': '\u2062',
-    'Itilde;': '\u0128',
-    'itilde;': '\u0129',
-    'Iukcy;': '\u0406',
-    'iukcy;': '\u0456',
-    'Iuml': '\xcf',
-    'iuml': '\xef',
-    'Iuml;': '\xcf',
-    'iuml;': '\xef',
-    'Jcirc;': '\u0134',
-    'jcirc;': '\u0135',
-    'Jcy;': '\u0419',
-    'jcy;': '\u0439',
-    'Jfr;': '\U0001d50d',
-    'jfr;': '\U0001d527',
-    'jmath;': '\u0237',
-    'Jopf;': '\U0001d541',
-    'jopf;': '\U0001d55b',
-    'Jscr;': '\U0001d4a5',
-    'jscr;': '\U0001d4bf',
-    'Jsercy;': '\u0408',
-    'jsercy;': '\u0458',
-    'Jukcy;': '\u0404',
-    'jukcy;': '\u0454',
-    'Kappa;': '\u039a',
-    'kappa;': '\u03ba',
-    'kappav;': '\u03f0',
-    'Kcedil;': '\u0136',
-    'kcedil;': '\u0137',
-    'Kcy;': '\u041a',
-    'kcy;': '\u043a',
-    'Kfr;': '\U0001d50e',
-    'kfr;': '\U0001d528',
-    'kgreen;': '\u0138',
-    'KHcy;': '\u0425',
-    'khcy;': '\u0445',
-    'KJcy;': '\u040c',
-    'kjcy;': '\u045c',
-    'Kopf;': '\U0001d542',
-    'kopf;': '\U0001d55c',
-    'Kscr;': '\U0001d4a6',
-    'kscr;': '\U0001d4c0',
-    'lAarr;': '\u21da',
-    'Lacute;': '\u0139',
-    'lacute;': '\u013a',
-    'laemptyv;': '\u29b4',
-    'lagran;': '\u2112',
-    'Lambda;': '\u039b',
-    'lambda;': '\u03bb',
-    'Lang;': '\u27ea',
-    'lang;': '\u27e8',
-    'langd;': '\u2991',
-    'langle;': '\u27e8',
-    'lap;': '\u2a85',
-    'Laplacetrf;': '\u2112',
-    'laquo': '\xab',
-    'laquo;': '\xab',
-    'Larr;': '\u219e',
-    'lArr;': '\u21d0',
-    'larr;': '\u2190',
-    'larrb;': '\u21e4',
-    'larrbfs;': '\u291f',
-    'larrfs;': '\u291d',
-    'larrhk;': '\u21a9',
-    'larrlp;': '\u21ab',
-    'larrpl;': '\u2939',
-    'larrsim;': '\u2973',
-    'larrtl;': '\u21a2',
-    'lat;': '\u2aab',
-    'lAtail;': '\u291b',
-    'latail;': '\u2919',
-    'late;': '\u2aad',
-    'lates;': '\u2aad\ufe00',
-    'lBarr;': '\u290e',
-    'lbarr;': '\u290c',
-    'lbbrk;': '\u2772',
-    'lbrace;': '{',
-    'lbrack;': '[',
-    'lbrke;': '\u298b',
-    'lbrksld;': '\u298f',
-    'lbrkslu;': '\u298d',
-    'Lcaron;': '\u013d',
-    'lcaron;': '\u013e',
-    'Lcedil;': '\u013b',
-    'lcedil;': '\u013c',
-    'lceil;': '\u2308',
-    'lcub;': '{',
-    'Lcy;': '\u041b',
-    'lcy;': '\u043b',
-    'ldca;': '\u2936',
-    'ldquo;': '\u201c',
-    'ldquor;': '\u201e',
-    'ldrdhar;': '\u2967',
-    'ldrushar;': '\u294b',
-    'ldsh;': '\u21b2',
-    'lE;': '\u2266',
-    'le;': '\u2264',
-    'LeftAngleBracket;': '\u27e8',
-    'LeftArrow;': '\u2190',
-    'Leftarrow;': '\u21d0',
-    'leftarrow;': '\u2190',
-    'LeftArrowBar;': '\u21e4',
-    'LeftArrowRightArrow;': '\u21c6',
-    'leftarrowtail;': '\u21a2',
-    'LeftCeiling;': '\u2308',
-    'LeftDoubleBracket;': '\u27e6',
-    'LeftDownTeeVector;': '\u2961',
-    'LeftDownVector;': '\u21c3',
-    'LeftDownVectorBar;': '\u2959',
-    'LeftFloor;': '\u230a',
-    'leftharpoondown;': '\u21bd',
-    'leftharpoonup;': '\u21bc',
-    'leftleftarrows;': '\u21c7',
-    'LeftRightArrow;': '\u2194',
-    'Leftrightarrow;': '\u21d4',
-    'leftrightarrow;': '\u2194',
-    'leftrightarrows;': '\u21c6',
-    'leftrightharpoons;': '\u21cb',
-    'leftrightsquigarrow;': '\u21ad',
-    'LeftRightVector;': '\u294e',
-    'LeftTee;': '\u22a3',
-    'LeftTeeArrow;': '\u21a4',
-    'LeftTeeVector;': '\u295a',
-    'leftthreetimes;': '\u22cb',
-    'LeftTriangle;': '\u22b2',
-    'LeftTriangleBar;': '\u29cf',
-    'LeftTriangleEqual;': '\u22b4',
-    'LeftUpDownVector;': '\u2951',
-    'LeftUpTeeVector;': '\u2960',
-    'LeftUpVector;': '\u21bf',
-    'LeftUpVectorBar;': '\u2958',
-    'LeftVector;': '\u21bc',
-    'LeftVectorBar;': '\u2952',
-    'lEg;': '\u2a8b',
-    'leg;': '\u22da',
-    'leq;': '\u2264',
-    'leqq;': '\u2266',
-    'leqslant;': '\u2a7d',
-    'les;': '\u2a7d',
-    'lescc;': '\u2aa8',
-    'lesdot;': '\u2a7f',
-    'lesdoto;': '\u2a81',
-    'lesdotor;': '\u2a83',
-    'lesg;': '\u22da\ufe00',
-    'lesges;': '\u2a93',
-    'lessapprox;': '\u2a85',
-    'lessdot;': '\u22d6',
-    'lesseqgtr;': '\u22da',
-    'lesseqqgtr;': '\u2a8b',
-    'LessEqualGreater;': '\u22da',
-    'LessFullEqual;': '\u2266',
-    'LessGreater;': '\u2276',
-    'lessgtr;': '\u2276',
-    'LessLess;': '\u2aa1',
-    'lesssim;': '\u2272',
-    'LessSlantEqual;': '\u2a7d',
-    'LessTilde;': '\u2272',
-    'lfisht;': '\u297c',
-    'lfloor;': '\u230a',
-    'Lfr;': '\U0001d50f',
-    'lfr;': '\U0001d529',
-    'lg;': '\u2276',
-    'lgE;': '\u2a91',
-    'lHar;': '\u2962',
-    'lhard;': '\u21bd',
-    'lharu;': '\u21bc',
-    'lharul;': '\u296a',
-    'lhblk;': '\u2584',
-    'LJcy;': '\u0409',
-    'ljcy;': '\u0459',
-    'Ll;': '\u22d8',
-    'll;': '\u226a',
-    'llarr;': '\u21c7',
-    'llcorner;': '\u231e',
-    'Lleftarrow;': '\u21da',
-    'llhard;': '\u296b',
-    'lltri;': '\u25fa',
-    'Lmidot;': '\u013f',
-    'lmidot;': '\u0140',
-    'lmoust;': '\u23b0',
-    'lmoustache;': '\u23b0',
-    'lnap;': '\u2a89',
-    'lnapprox;': '\u2a89',
-    'lnE;': '\u2268',
-    'lne;': '\u2a87',
-    'lneq;': '\u2a87',
-    'lneqq;': '\u2268',
-    'lnsim;': '\u22e6',
-    'loang;': '\u27ec',
-    'loarr;': '\u21fd',
-    'lobrk;': '\u27e6',
-    'LongLeftArrow;': '\u27f5',
-    'Longleftarrow;': '\u27f8',
-    'longleftarrow;': '\u27f5',
-    'LongLeftRightArrow;': '\u27f7',
-    'Longleftrightarrow;': '\u27fa',
-    'longleftrightarrow;': '\u27f7',
-    'longmapsto;': '\u27fc',
-    'LongRightArrow;': '\u27f6',
-    'Longrightarrow;': '\u27f9',
-    'longrightarrow;': '\u27f6',
-    'looparrowleft;': '\u21ab',
-    'looparrowright;': '\u21ac',
-    'lopar;': '\u2985',
-    'Lopf;': '\U0001d543',
-    'lopf;': '\U0001d55d',
-    'loplus;': '\u2a2d',
-    'lotimes;': '\u2a34',
-    'lowast;': '\u2217',
-    'lowbar;': '_',
-    'LowerLeftArrow;': '\u2199',
-    'LowerRightArrow;': '\u2198',
-    'loz;': '\u25ca',
-    'lozenge;': '\u25ca',
-    'lozf;': '\u29eb',
-    'lpar;': '(',
-    'lparlt;': '\u2993',
-    'lrarr;': '\u21c6',
-    'lrcorner;': '\u231f',
-    'lrhar;': '\u21cb',
-    'lrhard;': '\u296d',
-    'lrm;': '\u200e',
-    'lrtri;': '\u22bf',
-    'lsaquo;': '\u2039',
-    'Lscr;': '\u2112',
-    'lscr;': '\U0001d4c1',
-    'Lsh;': '\u21b0',
-    'lsh;': '\u21b0',
-    'lsim;': '\u2272',
-    'lsime;': '\u2a8d',
-    'lsimg;': '\u2a8f',
-    'lsqb;': '[',
-    'lsquo;': '\u2018',
-    'lsquor;': '\u201a',
-    'Lstrok;': '\u0141',
-    'lstrok;': '\u0142',
-    'LT': '<',
-    'lt': '<',
-    'LT;': '<',
-    'Lt;': '\u226a',
-    'lt;': '<',
-    'ltcc;': '\u2aa6',
-    'ltcir;': '\u2a79',
-    'ltdot;': '\u22d6',
-    'lthree;': '\u22cb',
-    'ltimes;': '\u22c9',
-    'ltlarr;': '\u2976',
-    'ltquest;': '\u2a7b',
-    'ltri;': '\u25c3',
-    'ltrie;': '\u22b4',
-    'ltrif;': '\u25c2',
-    'ltrPar;': '\u2996',
-    'lurdshar;': '\u294a',
-    'luruhar;': '\u2966',
-    'lvertneqq;': '\u2268\ufe00',
-    'lvnE;': '\u2268\ufe00',
-    'macr': '\xaf',
-    'macr;': '\xaf',
-    'male;': '\u2642',
-    'malt;': '\u2720',
-    'maltese;': '\u2720',
-    'Map;': '\u2905',
-    'map;': '\u21a6',
-    'mapsto;': '\u21a6',
-    'mapstodown;': '\u21a7',
-    'mapstoleft;': '\u21a4',
-    'mapstoup;': '\u21a5',
-    'marker;': '\u25ae',
-    'mcomma;': '\u2a29',
-    'Mcy;': '\u041c',
-    'mcy;': '\u043c',
-    'mdash;': '\u2014',
-    'mDDot;': '\u223a',
-    'measuredangle;': '\u2221',
-    'MediumSpace;': '\u205f',
-    'Mellintrf;': '\u2133',
-    'Mfr;': '\U0001d510',
-    'mfr;': '\U0001d52a',
-    'mho;': '\u2127',
-    'micro': '\xb5',
-    'micro;': '\xb5',
-    'mid;': '\u2223',
-    'midast;': '*',
-    'midcir;': '\u2af0',
-    'middot': '\xb7',
-    'middot;': '\xb7',
-    'minus;': '\u2212',
-    'minusb;': '\u229f',
-    'minusd;': '\u2238',
-    'minusdu;': '\u2a2a',
-    'MinusPlus;': '\u2213',
-    'mlcp;': '\u2adb',
-    'mldr;': '\u2026',
-    'mnplus;': '\u2213',
-    'models;': '\u22a7',
-    'Mopf;': '\U0001d544',
-    'mopf;': '\U0001d55e',
-    'mp;': '\u2213',
-    'Mscr;': '\u2133',
-    'mscr;': '\U0001d4c2',
-    'mstpos;': '\u223e',
-    'Mu;': '\u039c',
-    'mu;': '\u03bc',
-    'multimap;': '\u22b8',
-    'mumap;': '\u22b8',
-    'nabla;': '\u2207',
-    'Nacute;': '\u0143',
-    'nacute;': '\u0144',
-    'nang;': '\u2220\u20d2',
-    'nap;': '\u2249',
-    'napE;': '\u2a70\u0338',
-    'napid;': '\u224b\u0338',
-    'napos;': '\u0149',
-    'napprox;': '\u2249',
-    'natur;': '\u266e',
-    'natural;': '\u266e',
-    'naturals;': '\u2115',
-    'nbsp': '\xa0',
-    'nbsp;': '\xa0',
-    'nbump;': '\u224e\u0338',
-    'nbumpe;': '\u224f\u0338',
-    'ncap;': '\u2a43',
-    'Ncaron;': '\u0147',
-    'ncaron;': '\u0148',
-    'Ncedil;': '\u0145',
-    'ncedil;': '\u0146',
-    'ncong;': '\u2247',
-    'ncongdot;': '\u2a6d\u0338',
-    'ncup;': '\u2a42',
-    'Ncy;': '\u041d',
-    'ncy;': '\u043d',
-    'ndash;': '\u2013',
-    'ne;': '\u2260',
-    'nearhk;': '\u2924',
-    'neArr;': '\u21d7',
-    'nearr;': '\u2197',
-    'nearrow;': '\u2197',
-    'nedot;': '\u2250\u0338',
-    'NegativeMediumSpace;': '\u200b',
-    'NegativeThickSpace;': '\u200b',
-    'NegativeThinSpace;': '\u200b',
-    'NegativeVeryThinSpace;': '\u200b',
-    'nequiv;': '\u2262',
-    'nesear;': '\u2928',
-    'nesim;': '\u2242\u0338',
-    'NestedGreaterGreater;': '\u226b',
-    'NestedLessLess;': '\u226a',
-    'NewLine;': '\n',
-    'nexist;': '\u2204',
-    'nexists;': '\u2204',
-    'Nfr;': '\U0001d511',
-    'nfr;': '\U0001d52b',
-    'ngE;': '\u2267\u0338',
-    'nge;': '\u2271',
-    'ngeq;': '\u2271',
-    'ngeqq;': '\u2267\u0338',
-    'ngeqslant;': '\u2a7e\u0338',
-    'nges;': '\u2a7e\u0338',
-    'nGg;': '\u22d9\u0338',
-    'ngsim;': '\u2275',
-    'nGt;': '\u226b\u20d2',
-    'ngt;': '\u226f',
-    'ngtr;': '\u226f',
-    'nGtv;': '\u226b\u0338',
-    'nhArr;': '\u21ce',
-    'nharr;': '\u21ae',
-    'nhpar;': '\u2af2',
-    'ni;': '\u220b',
-    'nis;': '\u22fc',
-    'nisd;': '\u22fa',
-    'niv;': '\u220b',
-    'NJcy;': '\u040a',
-    'njcy;': '\u045a',
-    'nlArr;': '\u21cd',
-    'nlarr;': '\u219a',
-    'nldr;': '\u2025',
-    'nlE;': '\u2266\u0338',
-    'nle;': '\u2270',
-    'nLeftarrow;': '\u21cd',
-    'nleftarrow;': '\u219a',
-    'nLeftrightarrow;': '\u21ce',
-    'nleftrightarrow;': '\u21ae',
-    'nleq;': '\u2270',
-    'nleqq;': '\u2266\u0338',
-    'nleqslant;': '\u2a7d\u0338',
-    'nles;': '\u2a7d\u0338',
-    'nless;': '\u226e',
-    'nLl;': '\u22d8\u0338',
-    'nlsim;': '\u2274',
-    'nLt;': '\u226a\u20d2',
-    'nlt;': '\u226e',
-    'nltri;': '\u22ea',
-    'nltrie;': '\u22ec',
-    'nLtv;': '\u226a\u0338',
-    'nmid;': '\u2224',
-    'NoBreak;': '\u2060',
-    'NonBreakingSpace;': '\xa0',
-    'Nopf;': '\u2115',
-    'nopf;': '\U0001d55f',
-    'not': '\xac',
-    'Not;': '\u2aec',
-    'not;': '\xac',
-    'NotCongruent;': '\u2262',
-    'NotCupCap;': '\u226d',
-    'NotDoubleVerticalBar;': '\u2226',
-    'NotElement;': '\u2209',
-    'NotEqual;': '\u2260',
-    'NotEqualTilde;': '\u2242\u0338',
-    'NotExists;': '\u2204',
-    'NotGreater;': '\u226f',
-    'NotGreaterEqual;': '\u2271',
-    'NotGreaterFullEqual;': '\u2267\u0338',
-    'NotGreaterGreater;': '\u226b\u0338',
-    'NotGreaterLess;': '\u2279',
-    'NotGreaterSlantEqual;': '\u2a7e\u0338',
-    'NotGreaterTilde;': '\u2275',
-    'NotHumpDownHump;': '\u224e\u0338',
-    'NotHumpEqual;': '\u224f\u0338',
-    'notin;': '\u2209',
-    'notindot;': '\u22f5\u0338',
-    'notinE;': '\u22f9\u0338',
-    'notinva;': '\u2209',
-    'notinvb;': '\u22f7',
-    'notinvc;': '\u22f6',
-    'NotLeftTriangle;': '\u22ea',
-    'NotLeftTriangleBar;': '\u29cf\u0338',
-    'NotLeftTriangleEqual;': '\u22ec',
-    'NotLess;': '\u226e',
-    'NotLessEqual;': '\u2270',
-    'NotLessGreater;': '\u2278',
-    'NotLessLess;': '\u226a\u0338',
-    'NotLessSlantEqual;': '\u2a7d\u0338',
-    'NotLessTilde;': '\u2274',
-    'NotNestedGreaterGreater;': '\u2aa2\u0338',
-    'NotNestedLessLess;': '\u2aa1\u0338',
-    'notni;': '\u220c',
-    'notniva;': '\u220c',
-    'notnivb;': '\u22fe',
-    'notnivc;': '\u22fd',
-    'NotPrecedes;': '\u2280',
-    'NotPrecedesEqual;': '\u2aaf\u0338',
-    'NotPrecedesSlantEqual;': '\u22e0',
-    'NotReverseElement;': '\u220c',
-    'NotRightTriangle;': '\u22eb',
-    'NotRightTriangleBar;': '\u29d0\u0338',
-    'NotRightTriangleEqual;': '\u22ed',
-    'NotSquareSubset;': '\u228f\u0338',
-    'NotSquareSubsetEqual;': '\u22e2',
-    'NotSquareSuperset;': '\u2290\u0338',
-    'NotSquareSupersetEqual;': '\u22e3',
-    'NotSubset;': '\u2282\u20d2',
-    'NotSubsetEqual;': '\u2288',
-    'NotSucceeds;': '\u2281',
-    'NotSucceedsEqual;': '\u2ab0\u0338',
-    'NotSucceedsSlantEqual;': '\u22e1',
-    'NotSucceedsTilde;': '\u227f\u0338',
-    'NotSuperset;': '\u2283\u20d2',
-    'NotSupersetEqual;': '\u2289',
-    'NotTilde;': '\u2241',
-    'NotTildeEqual;': '\u2244',
-    'NotTildeFullEqual;': '\u2247',
-    'NotTildeTilde;': '\u2249',
-    'NotVerticalBar;': '\u2224',
-    'npar;': '\u2226',
-    'nparallel;': '\u2226',
-    'nparsl;': '\u2afd\u20e5',
-    'npart;': '\u2202\u0338',
-    'npolint;': '\u2a14',
-    'npr;': '\u2280',
-    'nprcue;': '\u22e0',
-    'npre;': '\u2aaf\u0338',
-    'nprec;': '\u2280',
-    'npreceq;': '\u2aaf\u0338',
-    'nrArr;': '\u21cf',
-    'nrarr;': '\u219b',
-    'nrarrc;': '\u2933\u0338',
-    'nrarrw;': '\u219d\u0338',
-    'nRightarrow;': '\u21cf',
-    'nrightarrow;': '\u219b',
-    'nrtri;': '\u22eb',
-    'nrtrie;': '\u22ed',
-    'nsc;': '\u2281',
-    'nsccue;': '\u22e1',
-    'nsce;': '\u2ab0\u0338',
-    'Nscr;': '\U0001d4a9',
-    'nscr;': '\U0001d4c3',
-    'nshortmid;': '\u2224',
-    'nshortparallel;': '\u2226',
-    'nsim;': '\u2241',
-    'nsime;': '\u2244',
-    'nsimeq;': '\u2244',
-    'nsmid;': '\u2224',
-    'nspar;': '\u2226',
-    'nsqsube;': '\u22e2',
-    'nsqsupe;': '\u22e3',
-    'nsub;': '\u2284',
-    'nsubE;': '\u2ac5\u0338',
-    'nsube;': '\u2288',
-    'nsubset;': '\u2282\u20d2',
-    'nsubseteq;': '\u2288',
-    'nsubseteqq;': '\u2ac5\u0338',
-    'nsucc;': '\u2281',
-    'nsucceq;': '\u2ab0\u0338',
-    'nsup;': '\u2285',
-    'nsupE;': '\u2ac6\u0338',
-    'nsupe;': '\u2289',
-    'nsupset;': '\u2283\u20d2',
-    'nsupseteq;': '\u2289',
-    'nsupseteqq;': '\u2ac6\u0338',
-    'ntgl;': '\u2279',
-    'Ntilde': '\xd1',
-    'ntilde': '\xf1',
-    'Ntilde;': '\xd1',
-    'ntilde;': '\xf1',
-    'ntlg;': '\u2278',
-    'ntriangleleft;': '\u22ea',
-    'ntrianglelefteq;': '\u22ec',
-    'ntriangleright;': '\u22eb',
-    'ntrianglerighteq;': '\u22ed',
-    'Nu;': '\u039d',
-    'nu;': '\u03bd',
-    'num;': '#',
-    'numero;': '\u2116',
-    'numsp;': '\u2007',
-    'nvap;': '\u224d\u20d2',
-    'nVDash;': '\u22af',
-    'nVdash;': '\u22ae',
-    'nvDash;': '\u22ad',
-    'nvdash;': '\u22ac',
-    'nvge;': '\u2265\u20d2',
-    'nvgt;': '>\u20d2',
-    'nvHarr;': '\u2904',
-    'nvinfin;': '\u29de',
-    'nvlArr;': '\u2902',
-    'nvle;': '\u2264\u20d2',
-    'nvlt;': '<\u20d2',
-    'nvltrie;': '\u22b4\u20d2',
-    'nvrArr;': '\u2903',
-    'nvrtrie;': '\u22b5\u20d2',
-    'nvsim;': '\u223c\u20d2',
-    'nwarhk;': '\u2923',
-    'nwArr;': '\u21d6',
-    'nwarr;': '\u2196',
-    'nwarrow;': '\u2196',
-    'nwnear;': '\u2927',
-    'Oacute': '\xd3',
-    'oacute': '\xf3',
-    'Oacute;': '\xd3',
-    'oacute;': '\xf3',
-    'oast;': '\u229b',
-    'ocir;': '\u229a',
-    'Ocirc': '\xd4',
-    'ocirc': '\xf4',
-    'Ocirc;': '\xd4',
-    'ocirc;': '\xf4',
-    'Ocy;': '\u041e',
-    'ocy;': '\u043e',
-    'odash;': '\u229d',
-    'Odblac;': '\u0150',
-    'odblac;': '\u0151',
-    'odiv;': '\u2a38',
-    'odot;': '\u2299',
-    'odsold;': '\u29bc',
-    'OElig;': '\u0152',
-    'oelig;': '\u0153',
-    'ofcir;': '\u29bf',
-    'Ofr;': '\U0001d512',
-    'ofr;': '\U0001d52c',
-    'ogon;': '\u02db',
-    'Ograve': '\xd2',
-    'ograve': '\xf2',
-    'Ograve;': '\xd2',
-    'ograve;': '\xf2',
-    'ogt;': '\u29c1',
-    'ohbar;': '\u29b5',
-    'ohm;': '\u03a9',
-    'oint;': '\u222e',
-    'olarr;': '\u21ba',
-    'olcir;': '\u29be',
-    'olcross;': '\u29bb',
-    'oline;': '\u203e',
-    'olt;': '\u29c0',
-    'Omacr;': '\u014c',
-    'omacr;': '\u014d',
-    'Omega;': '\u03a9',
-    'omega;': '\u03c9',
-    'Omicron;': '\u039f',
-    'omicron;': '\u03bf',
-    'omid;': '\u29b6',
-    'ominus;': '\u2296',
-    'Oopf;': '\U0001d546',
-    'oopf;': '\U0001d560',
-    'opar;': '\u29b7',
-    'OpenCurlyDoubleQuote;': '\u201c',
-    'OpenCurlyQuote;': '\u2018',
-    'operp;': '\u29b9',
-    'oplus;': '\u2295',
-    'Or;': '\u2a54',
-    'or;': '\u2228',
-    'orarr;': '\u21bb',
-    'ord;': '\u2a5d',
-    'order;': '\u2134',
-    'orderof;': '\u2134',
-    'ordf': '\xaa',
-    'ordf;': '\xaa',
-    'ordm': '\xba',
-    'ordm;': '\xba',
-    'origof;': '\u22b6',
-    'oror;': '\u2a56',
-    'orslope;': '\u2a57',
-    'orv;': '\u2a5b',
-    'oS;': '\u24c8',
-    'Oscr;': '\U0001d4aa',
-    'oscr;': '\u2134',
-    'Oslash': '\xd8',
-    'oslash': '\xf8',
-    'Oslash;': '\xd8',
-    'oslash;': '\xf8',
-    'osol;': '\u2298',
-    'Otilde': '\xd5',
-    'otilde': '\xf5',
-    'Otilde;': '\xd5',
-    'otilde;': '\xf5',
-    'Otimes;': '\u2a37',
-    'otimes;': '\u2297',
-    'otimesas;': '\u2a36',
-    'Ouml': '\xd6',
-    'ouml': '\xf6',
-    'Ouml;': '\xd6',
-    'ouml;': '\xf6',
-    'ovbar;': '\u233d',
-    'OverBar;': '\u203e',
-    'OverBrace;': '\u23de',
-    'OverBracket;': '\u23b4',
-    'OverParenthesis;': '\u23dc',
-    'par;': '\u2225',
-    'para': '\xb6',
-    'para;': '\xb6',
-    'parallel;': '\u2225',
-    'parsim;': '\u2af3',
-    'parsl;': '\u2afd',
-    'part;': '\u2202',
-    'PartialD;': '\u2202',
-    'Pcy;': '\u041f',
-    'pcy;': '\u043f',
-    'percnt;': '%',
-    'period;': '.',
-    'permil;': '\u2030',
-    'perp;': '\u22a5',
-    'pertenk;': '\u2031',
-    'Pfr;': '\U0001d513',
-    'pfr;': '\U0001d52d',
-    'Phi;': '\u03a6',
-    'phi;': '\u03c6',
-    'phiv;': '\u03d5',
-    'phmmat;': '\u2133',
-    'phone;': '\u260e',
-    'Pi;': '\u03a0',
-    'pi;': '\u03c0',
-    'pitchfork;': '\u22d4',
-    'piv;': '\u03d6',
-    'planck;': '\u210f',
-    'planckh;': '\u210e',
-    'plankv;': '\u210f',
-    'plus;': '+',
-    'plusacir;': '\u2a23',
-    'plusb;': '\u229e',
-    'pluscir;': '\u2a22',
-    'plusdo;': '\u2214',
-    'plusdu;': '\u2a25',
-    'pluse;': '\u2a72',
-    'PlusMinus;': '\xb1',
-    'plusmn': '\xb1',
-    'plusmn;': '\xb1',
-    'plussim;': '\u2a26',
-    'plustwo;': '\u2a27',
-    'pm;': '\xb1',
-    'Poincareplane;': '\u210c',
-    'pointint;': '\u2a15',
-    'Popf;': '\u2119',
-    'popf;': '\U0001d561',
-    'pound': '\xa3',
-    'pound;': '\xa3',
-    'Pr;': '\u2abb',
-    'pr;': '\u227a',
-    'prap;': '\u2ab7',
-    'prcue;': '\u227c',
-    'prE;': '\u2ab3',
-    'pre;': '\u2aaf',
-    'prec;': '\u227a',
-    'precapprox;': '\u2ab7',
-    'preccurlyeq;': '\u227c',
-    'Precedes;': '\u227a',
-    'PrecedesEqual;': '\u2aaf',
-    'PrecedesSlantEqual;': '\u227c',
-    'PrecedesTilde;': '\u227e',
-    'preceq;': '\u2aaf',
-    'precnapprox;': '\u2ab9',
-    'precneqq;': '\u2ab5',
-    'precnsim;': '\u22e8',
-    'precsim;': '\u227e',
-    'Prime;': '\u2033',
-    'prime;': '\u2032',
-    'primes;': '\u2119',
-    'prnap;': '\u2ab9',
-    'prnE;': '\u2ab5',
-    'prnsim;': '\u22e8',
-    'prod;': '\u220f',
-    'Product;': '\u220f',
-    'profalar;': '\u232e',
-    'profline;': '\u2312',
-    'profsurf;': '\u2313',
-    'prop;': '\u221d',
-    'Proportion;': '\u2237',
-    'Proportional;': '\u221d',
-    'propto;': '\u221d',
-    'prsim;': '\u227e',
-    'prurel;': '\u22b0',
-    'Pscr;': '\U0001d4ab',
-    'pscr;': '\U0001d4c5',
-    'Psi;': '\u03a8',
-    'psi;': '\u03c8',
-    'puncsp;': '\u2008',
-    'Qfr;': '\U0001d514',
-    'qfr;': '\U0001d52e',
-    'qint;': '\u2a0c',
-    'Qopf;': '\u211a',
-    'qopf;': '\U0001d562',
-    'qprime;': '\u2057',
-    'Qscr;': '\U0001d4ac',
-    'qscr;': '\U0001d4c6',
-    'quaternions;': '\u210d',
-    'quatint;': '\u2a16',
-    'quest;': '?',
-    'questeq;': '\u225f',
-    'QUOT': '"',
-    'quot': '"',
-    'QUOT;': '"',
-    'quot;': '"',
-    'rAarr;': '\u21db',
-    'race;': '\u223d\u0331',
-    'Racute;': '\u0154',
-    'racute;': '\u0155',
-    'radic;': '\u221a',
-    'raemptyv;': '\u29b3',
-    'Rang;': '\u27eb',
-    'rang;': '\u27e9',
-    'rangd;': '\u2992',
-    'range;': '\u29a5',
-    'rangle;': '\u27e9',
-    'raquo': '\xbb',
-    'raquo;': '\xbb',
-    'Rarr;': '\u21a0',
-    'rArr;': '\u21d2',
-    'rarr;': '\u2192',
-    'rarrap;': '\u2975',
-    'rarrb;': '\u21e5',
-    'rarrbfs;': '\u2920',
-    'rarrc;': '\u2933',
-    'rarrfs;': '\u291e',
-    'rarrhk;': '\u21aa',
-    'rarrlp;': '\u21ac',
-    'rarrpl;': '\u2945',
-    'rarrsim;': '\u2974',
-    'Rarrtl;': '\u2916',
-    'rarrtl;': '\u21a3',
-    'rarrw;': '\u219d',
-    'rAtail;': '\u291c',
-    'ratail;': '\u291a',
-    'ratio;': '\u2236',
-    'rationals;': '\u211a',
-    'RBarr;': '\u2910',
-    'rBarr;': '\u290f',
-    'rbarr;': '\u290d',
-    'rbbrk;': '\u2773',
-    'rbrace;': '}',
-    'rbrack;': ']',
-    'rbrke;': '\u298c',
-    'rbrksld;': '\u298e',
-    'rbrkslu;': '\u2990',
-    'Rcaron;': '\u0158',
-    'rcaron;': '\u0159',
-    'Rcedil;': '\u0156',
-    'rcedil;': '\u0157',
-    'rceil;': '\u2309',
-    'rcub;': '}',
-    'Rcy;': '\u0420',
-    'rcy;': '\u0440',
-    'rdca;': '\u2937',
-    'rdldhar;': '\u2969',
-    'rdquo;': '\u201d',
-    'rdquor;': '\u201d',
-    'rdsh;': '\u21b3',
-    'Re;': '\u211c',
-    'real;': '\u211c',
-    'realine;': '\u211b',
-    'realpart;': '\u211c',
-    'reals;': '\u211d',
-    'rect;': '\u25ad',
-    'REG': '\xae',
-    'reg': '\xae',
-    'REG;': '\xae',
-    'reg;': '\xae',
-    'ReverseElement;': '\u220b',
-    'ReverseEquilibrium;': '\u21cb',
-    'ReverseUpEquilibrium;': '\u296f',
-    'rfisht;': '\u297d',
-    'rfloor;': '\u230b',
-    'Rfr;': '\u211c',
-    'rfr;': '\U0001d52f',
-    'rHar;': '\u2964',
-    'rhard;': '\u21c1',
-    'rharu;': '\u21c0',
-    'rharul;': '\u296c',
-    'Rho;': '\u03a1',
-    'rho;': '\u03c1',
-    'rhov;': '\u03f1',
-    'RightAngleBracket;': '\u27e9',
-    'RightArrow;': '\u2192',
-    'Rightarrow;': '\u21d2',
-    'rightarrow;': '\u2192',
-    'RightArrowBar;': '\u21e5',
-    'RightArrowLeftArrow;': '\u21c4',
-    'rightarrowtail;': '\u21a3',
-    'RightCeiling;': '\u2309',
-    'RightDoubleBracket;': '\u27e7',
-    'RightDownTeeVector;': '\u295d',
-    'RightDownVector;': '\u21c2',
-    'RightDownVectorBar;': '\u2955',
-    'RightFloor;': '\u230b',
-    'rightharpoondown;': '\u21c1',
-    'rightharpoonup;': '\u21c0',
-    'rightleftarrows;': '\u21c4',
-    'rightleftharpoons;': '\u21cc',
-    'rightrightarrows;': '\u21c9',
-    'rightsquigarrow;': '\u219d',
-    'RightTee;': '\u22a2',
-    'RightTeeArrow;': '\u21a6',
-    'RightTeeVector;': '\u295b',
-    'rightthreetimes;': '\u22cc',
-    'RightTriangle;': '\u22b3',
-    'RightTriangleBar;': '\u29d0',
-    'RightTriangleEqual;': '\u22b5',
-    'RightUpDownVector;': '\u294f',
-    'RightUpTeeVector;': '\u295c',
-    'RightUpVector;': '\u21be',
-    'RightUpVectorBar;': '\u2954',
-    'RightVector;': '\u21c0',
-    'RightVectorBar;': '\u2953',
-    'ring;': '\u02da',
-    'risingdotseq;': '\u2253',
-    'rlarr;': '\u21c4',
-    'rlhar;': '\u21cc',
-    'rlm;': '\u200f',
-    'rmoust;': '\u23b1',
-    'rmoustache;': '\u23b1',
-    'rnmid;': '\u2aee',
-    'roang;': '\u27ed',
-    'roarr;': '\u21fe',
-    'robrk;': '\u27e7',
-    'ropar;': '\u2986',
-    'Ropf;': '\u211d',
-    'ropf;': '\U0001d563',
-    'roplus;': '\u2a2e',
-    'rotimes;': '\u2a35',
-    'RoundImplies;': '\u2970',
-    'rpar;': ')',
-    'rpargt;': '\u2994',
-    'rppolint;': '\u2a12',
-    'rrarr;': '\u21c9',
-    'Rrightarrow;': '\u21db',
-    'rsaquo;': '\u203a',
-    'Rscr;': '\u211b',
-    'rscr;': '\U0001d4c7',
-    'Rsh;': '\u21b1',
-    'rsh;': '\u21b1',
-    'rsqb;': ']',
-    'rsquo;': '\u2019',
-    'rsquor;': '\u2019',
-    'rthree;': '\u22cc',
-    'rtimes;': '\u22ca',
-    'rtri;': '\u25b9',
-    'rtrie;': '\u22b5',
-    'rtrif;': '\u25b8',
-    'rtriltri;': '\u29ce',
-    'RuleDelayed;': '\u29f4',
-    'ruluhar;': '\u2968',
-    'rx;': '\u211e',
-    'Sacute;': '\u015a',
-    'sacute;': '\u015b',
-    'sbquo;': '\u201a',
-    'Sc;': '\u2abc',
-    'sc;': '\u227b',
-    'scap;': '\u2ab8',
-    'Scaron;': '\u0160',
-    'scaron;': '\u0161',
-    'sccue;': '\u227d',
-    'scE;': '\u2ab4',
-    'sce;': '\u2ab0',
-    'Scedil;': '\u015e',
-    'scedil;': '\u015f',
-    'Scirc;': '\u015c',
-    'scirc;': '\u015d',
-    'scnap;': '\u2aba',
-    'scnE;': '\u2ab6',
-    'scnsim;': '\u22e9',
-    'scpolint;': '\u2a13',
-    'scsim;': '\u227f',
-    'Scy;': '\u0421',
-    'scy;': '\u0441',
-    'sdot;': '\u22c5',
-    'sdotb;': '\u22a1',
-    'sdote;': '\u2a66',
-    'searhk;': '\u2925',
-    'seArr;': '\u21d8',
-    'searr;': '\u2198',
-    'searrow;': '\u2198',
-    'sect': '\xa7',
-    'sect;': '\xa7',
-    'semi;': ';',
-    'seswar;': '\u2929',
-    'setminus;': '\u2216',
-    'setmn;': '\u2216',
-    'sext;': '\u2736',
-    'Sfr;': '\U0001d516',
-    'sfr;': '\U0001d530',
-    'sfrown;': '\u2322',
-    'sharp;': '\u266f',
-    'SHCHcy;': '\u0429',
-    'shchcy;': '\u0449',
-    'SHcy;': '\u0428',
-    'shcy;': '\u0448',
-    'ShortDownArrow;': '\u2193',
-    'ShortLeftArrow;': '\u2190',
-    'shortmid;': '\u2223',
-    'shortparallel;': '\u2225',
-    'ShortRightArrow;': '\u2192',
-    'ShortUpArrow;': '\u2191',
-    'shy': '\xad',
-    'shy;': '\xad',
-    'Sigma;': '\u03a3',
-    'sigma;': '\u03c3',
-    'sigmaf;': '\u03c2',
-    'sigmav;': '\u03c2',
-    'sim;': '\u223c',
-    'simdot;': '\u2a6a',
-    'sime;': '\u2243',
-    'simeq;': '\u2243',
-    'simg;': '\u2a9e',
-    'simgE;': '\u2aa0',
-    'siml;': '\u2a9d',
-    'simlE;': '\u2a9f',
-    'simne;': '\u2246',
-    'simplus;': '\u2a24',
-    'simrarr;': '\u2972',
-    'slarr;': '\u2190',
-    'SmallCircle;': '\u2218',
-    'smallsetminus;': '\u2216',
-    'smashp;': '\u2a33',
-    'smeparsl;': '\u29e4',
-    'smid;': '\u2223',
-    'smile;': '\u2323',
-    'smt;': '\u2aaa',
-    'smte;': '\u2aac',
-    'smtes;': '\u2aac\ufe00',
-    'SOFTcy;': '\u042c',
-    'softcy;': '\u044c',
-    'sol;': '/',
-    'solb;': '\u29c4',
-    'solbar;': '\u233f',
-    'Sopf;': '\U0001d54a',
-    'sopf;': '\U0001d564',
-    'spades;': '\u2660',
-    'spadesuit;': '\u2660',
-    'spar;': '\u2225',
-    'sqcap;': '\u2293',
-    'sqcaps;': '\u2293\ufe00',
-    'sqcup;': '\u2294',
-    'sqcups;': '\u2294\ufe00',
-    'Sqrt;': '\u221a',
-    'sqsub;': '\u228f',
-    'sqsube;': '\u2291',
-    'sqsubset;': '\u228f',
-    'sqsubseteq;': '\u2291',
-    'sqsup;': '\u2290',
-    'sqsupe;': '\u2292',
-    'sqsupset;': '\u2290',
-    'sqsupseteq;': '\u2292',
-    'squ;': '\u25a1',
-    'Square;': '\u25a1',
-    'square;': '\u25a1',
-    'SquareIntersection;': '\u2293',
-    'SquareSubset;': '\u228f',
-    'SquareSubsetEqual;': '\u2291',
-    'SquareSuperset;': '\u2290',
-    'SquareSupersetEqual;': '\u2292',
-    'SquareUnion;': '\u2294',
-    'squarf;': '\u25aa',
-    'squf;': '\u25aa',
-    'srarr;': '\u2192',
-    'Sscr;': '\U0001d4ae',
-    'sscr;': '\U0001d4c8',
-    'ssetmn;': '\u2216',
-    'ssmile;': '\u2323',
-    'sstarf;': '\u22c6',
-    'Star;': '\u22c6',
-    'star;': '\u2606',
-    'starf;': '\u2605',
-    'straightepsilon;': '\u03f5',
-    'straightphi;': '\u03d5',
-    'strns;': '\xaf',
-    'Sub;': '\u22d0',
-    'sub;': '\u2282',
-    'subdot;': '\u2abd',
-    'subE;': '\u2ac5',
-    'sube;': '\u2286',
-    'subedot;': '\u2ac3',
-    'submult;': '\u2ac1',
-    'subnE;': '\u2acb',
-    'subne;': '\u228a',
-    'subplus;': '\u2abf',
-    'subrarr;': '\u2979',
-    'Subset;': '\u22d0',
-    'subset;': '\u2282',
-    'subseteq;': '\u2286',
-    'subseteqq;': '\u2ac5',
-    'SubsetEqual;': '\u2286',
-    'subsetneq;': '\u228a',
-    'subsetneqq;': '\u2acb',
-    'subsim;': '\u2ac7',
-    'subsub;': '\u2ad5',
-    'subsup;': '\u2ad3',
-    'succ;': '\u227b',
-    'succapprox;': '\u2ab8',
-    'succcurlyeq;': '\u227d',
-    'Succeeds;': '\u227b',
-    'SucceedsEqual;': '\u2ab0',
-    'SucceedsSlantEqual;': '\u227d',
-    'SucceedsTilde;': '\u227f',
-    'succeq;': '\u2ab0',
-    'succnapprox;': '\u2aba',
-    'succneqq;': '\u2ab6',
-    'succnsim;': '\u22e9',
-    'succsim;': '\u227f',
-    'SuchThat;': '\u220b',
-    'Sum;': '\u2211',
-    'sum;': '\u2211',
-    'sung;': '\u266a',
-    'sup1': '\xb9',
-    'sup1;': '\xb9',
-    'sup2': '\xb2',
-    'sup2;': '\xb2',
-    'sup3': '\xb3',
-    'sup3;': '\xb3',
-    'Sup;': '\u22d1',
-    'sup;': '\u2283',
-    'supdot;': '\u2abe',
-    'supdsub;': '\u2ad8',
-    'supE;': '\u2ac6',
-    'supe;': '\u2287',
-    'supedot;': '\u2ac4',
-    'Superset;': '\u2283',
-    'SupersetEqual;': '\u2287',
-    'suphsol;': '\u27c9',
-    'suphsub;': '\u2ad7',
-    'suplarr;': '\u297b',
-    'supmult;': '\u2ac2',
-    'supnE;': '\u2acc',
-    'supne;': '\u228b',
-    'supplus;': '\u2ac0',
-    'Supset;': '\u22d1',
-    'supset;': '\u2283',
-    'supseteq;': '\u2287',
-    'supseteqq;': '\u2ac6',
-    'supsetneq;': '\u228b',
-    'supsetneqq;': '\u2acc',
-    'supsim;': '\u2ac8',
-    'supsub;': '\u2ad4',
-    'supsup;': '\u2ad6',
-    'swarhk;': '\u2926',
-    'swArr;': '\u21d9',
-    'swarr;': '\u2199',
-    'swarrow;': '\u2199',
-    'swnwar;': '\u292a',
-    'szlig': '\xdf',
-    'szlig;': '\xdf',
-    'Tab;': '\t',
-    'target;': '\u2316',
-    'Tau;': '\u03a4',
-    'tau;': '\u03c4',
-    'tbrk;': '\u23b4',
-    'Tcaron;': '\u0164',
-    'tcaron;': '\u0165',
-    'Tcedil;': '\u0162',
-    'tcedil;': '\u0163',
-    'Tcy;': '\u0422',
-    'tcy;': '\u0442',
-    'tdot;': '\u20db',
-    'telrec;': '\u2315',
-    'Tfr;': '\U0001d517',
-    'tfr;': '\U0001d531',
-    'there4;': '\u2234',
-    'Therefore;': '\u2234',
-    'therefore;': '\u2234',
-    'Theta;': '\u0398',
-    'theta;': '\u03b8',
-    'thetasym;': '\u03d1',
-    'thetav;': '\u03d1',
-    'thickapprox;': '\u2248',
-    'thicksim;': '\u223c',
-    'ThickSpace;': '\u205f\u200a',
-    'thinsp;': '\u2009',
-    'ThinSpace;': '\u2009',
-    'thkap;': '\u2248',
-    'thksim;': '\u223c',
-    'THORN': '\xde',
-    'thorn': '\xfe',
-    'THORN;': '\xde',
-    'thorn;': '\xfe',
-    'Tilde;': '\u223c',
-    'tilde;': '\u02dc',
-    'TildeEqual;': '\u2243',
-    'TildeFullEqual;': '\u2245',
-    'TildeTilde;': '\u2248',
-    'times': '\xd7',
-    'times;': '\xd7',
-    'timesb;': '\u22a0',
-    'timesbar;': '\u2a31',
-    'timesd;': '\u2a30',
-    'tint;': '\u222d',
-    'toea;': '\u2928',
-    'top;': '\u22a4',
-    'topbot;': '\u2336',
-    'topcir;': '\u2af1',
-    'Topf;': '\U0001d54b',
-    'topf;': '\U0001d565',
-    'topfork;': '\u2ada',
-    'tosa;': '\u2929',
-    'tprime;': '\u2034',
-    'TRADE;': '\u2122',
-    'trade;': '\u2122',
-    'triangle;': '\u25b5',
-    'triangledown;': '\u25bf',
-    'triangleleft;': '\u25c3',
-    'trianglelefteq;': '\u22b4',
-    'triangleq;': '\u225c',
-    'triangleright;': '\u25b9',
-    'trianglerighteq;': '\u22b5',
-    'tridot;': '\u25ec',
-    'trie;': '\u225c',
-    'triminus;': '\u2a3a',
-    'TripleDot;': '\u20db',
-    'triplus;': '\u2a39',
-    'trisb;': '\u29cd',
-    'tritime;': '\u2a3b',
-    'trpezium;': '\u23e2',
-    'Tscr;': '\U0001d4af',
-    'tscr;': '\U0001d4c9',
-    'TScy;': '\u0426',
-    'tscy;': '\u0446',
-    'TSHcy;': '\u040b',
-    'tshcy;': '\u045b',
-    'Tstrok;': '\u0166',
-    'tstrok;': '\u0167',
-    'twixt;': '\u226c',
-    'twoheadleftarrow;': '\u219e',
-    'twoheadrightarrow;': '\u21a0',
-    'Uacute': '\xda',
-    'uacute': '\xfa',
-    'Uacute;': '\xda',
-    'uacute;': '\xfa',
-    'Uarr;': '\u219f',
-    'uArr;': '\u21d1',
-    'uarr;': '\u2191',
-    'Uarrocir;': '\u2949',
-    'Ubrcy;': '\u040e',
-    'ubrcy;': '\u045e',
-    'Ubreve;': '\u016c',
-    'ubreve;': '\u016d',
-    'Ucirc': '\xdb',
-    'ucirc': '\xfb',
-    'Ucirc;': '\xdb',
-    'ucirc;': '\xfb',
-    'Ucy;': '\u0423',
-    'ucy;': '\u0443',
-    'udarr;': '\u21c5',
-    'Udblac;': '\u0170',
-    'udblac;': '\u0171',
-    'udhar;': '\u296e',
-    'ufisht;': '\u297e',
-    'Ufr;': '\U0001d518',
-    'ufr;': '\U0001d532',
-    'Ugrave': '\xd9',
-    'ugrave': '\xf9',
-    'Ugrave;': '\xd9',
-    'ugrave;': '\xf9',
-    'uHar;': '\u2963',
-    'uharl;': '\u21bf',
-    'uharr;': '\u21be',
-    'uhblk;': '\u2580',
-    'ulcorn;': '\u231c',
-    'ulcorner;': '\u231c',
-    'ulcrop;': '\u230f',
-    'ultri;': '\u25f8',
-    'Umacr;': '\u016a',
-    'umacr;': '\u016b',
-    'uml': '\xa8',
-    'uml;': '\xa8',
-    'UnderBar;': '_',
-    'UnderBrace;': '\u23df',
-    'UnderBracket;': '\u23b5',
-    'UnderParenthesis;': '\u23dd',
-    'Union;': '\u22c3',
-    'UnionPlus;': '\u228e',
-    'Uogon;': '\u0172',
-    'uogon;': '\u0173',
-    'Uopf;': '\U0001d54c',
-    'uopf;': '\U0001d566',
-    'UpArrow;': '\u2191',
-    'Uparrow;': '\u21d1',
-    'uparrow;': '\u2191',
-    'UpArrowBar;': '\u2912',
-    'UpArrowDownArrow;': '\u21c5',
-    'UpDownArrow;': '\u2195',
-    'Updownarrow;': '\u21d5',
-    'updownarrow;': '\u2195',
-    'UpEquilibrium;': '\u296e',
-    'upharpoonleft;': '\u21bf',
-    'upharpoonright;': '\u21be',
-    'uplus;': '\u228e',
-    'UpperLeftArrow;': '\u2196',
-    'UpperRightArrow;': '\u2197',
-    'Upsi;': '\u03d2',
-    'upsi;': '\u03c5',
-    'upsih;': '\u03d2',
-    'Upsilon;': '\u03a5',
-    'upsilon;': '\u03c5',
-    'UpTee;': '\u22a5',
-    'UpTeeArrow;': '\u21a5',
-    'upuparrows;': '\u21c8',
-    'urcorn;': '\u231d',
-    'urcorner;': '\u231d',
-    'urcrop;': '\u230e',
-    'Uring;': '\u016e',
-    'uring;': '\u016f',
-    'urtri;': '\u25f9',
-    'Uscr;': '\U0001d4b0',
-    'uscr;': '\U0001d4ca',
-    'utdot;': '\u22f0',
-    'Utilde;': '\u0168',
-    'utilde;': '\u0169',
-    'utri;': '\u25b5',
-    'utrif;': '\u25b4',
-    'uuarr;': '\u21c8',
-    'Uuml': '\xdc',
-    'uuml': '\xfc',
-    'Uuml;': '\xdc',
-    'uuml;': '\xfc',
-    'uwangle;': '\u29a7',
-    'vangrt;': '\u299c',
-    'varepsilon;': '\u03f5',
-    'varkappa;': '\u03f0',
-    'varnothing;': '\u2205',
-    'varphi;': '\u03d5',
-    'varpi;': '\u03d6',
-    'varpropto;': '\u221d',
-    'vArr;': '\u21d5',
-    'varr;': '\u2195',
-    'varrho;': '\u03f1',
-    'varsigma;': '\u03c2',
-    'varsubsetneq;': '\u228a\ufe00',
-    'varsubsetneqq;': '\u2acb\ufe00',
-    'varsupsetneq;': '\u228b\ufe00',
-    'varsupsetneqq;': '\u2acc\ufe00',
-    'vartheta;': '\u03d1',
-    'vartriangleleft;': '\u22b2',
-    'vartriangleright;': '\u22b3',
-    'Vbar;': '\u2aeb',
-    'vBar;': '\u2ae8',
-    'vBarv;': '\u2ae9',
-    'Vcy;': '\u0412',
-    'vcy;': '\u0432',
-    'VDash;': '\u22ab',
-    'Vdash;': '\u22a9',
-    'vDash;': '\u22a8',
-    'vdash;': '\u22a2',
-    'Vdashl;': '\u2ae6',
-    'Vee;': '\u22c1',
-    'vee;': '\u2228',
-    'veebar;': '\u22bb',
-    'veeeq;': '\u225a',
-    'vellip;': '\u22ee',
-    'Verbar;': '\u2016',
-    'verbar;': '|',
-    'Vert;': '\u2016',
-    'vert;': '|',
-    'VerticalBar;': '\u2223',
-    'VerticalLine;': '|',
-    'VerticalSeparator;': '\u2758',
-    'VerticalTilde;': '\u2240',
-    'VeryThinSpace;': '\u200a',
-    'Vfr;': '\U0001d519',
-    'vfr;': '\U0001d533',
-    'vltri;': '\u22b2',
-    'vnsub;': '\u2282\u20d2',
-    'vnsup;': '\u2283\u20d2',
-    'Vopf;': '\U0001d54d',
-    'vopf;': '\U0001d567',
-    'vprop;': '\u221d',
-    'vrtri;': '\u22b3',
-    'Vscr;': '\U0001d4b1',
-    'vscr;': '\U0001d4cb',
-    'vsubnE;': '\u2acb\ufe00',
-    'vsubne;': '\u228a\ufe00',
-    'vsupnE;': '\u2acc\ufe00',
-    'vsupne;': '\u228b\ufe00',
-    'Vvdash;': '\u22aa',
-    'vzigzag;': '\u299a',
-    'Wcirc;': '\u0174',
-    'wcirc;': '\u0175',
-    'wedbar;': '\u2a5f',
-    'Wedge;': '\u22c0',
-    'wedge;': '\u2227',
-    'wedgeq;': '\u2259',
-    'weierp;': '\u2118',
-    'Wfr;': '\U0001d51a',
-    'wfr;': '\U0001d534',
-    'Wopf;': '\U0001d54e',
-    'wopf;': '\U0001d568',
-    'wp;': '\u2118',
-    'wr;': '\u2240',
-    'wreath;': '\u2240',
-    'Wscr;': '\U0001d4b2',
-    'wscr;': '\U0001d4cc',
-    'xcap;': '\u22c2',
-    'xcirc;': '\u25ef',
-    'xcup;': '\u22c3',
-    'xdtri;': '\u25bd',
-    'Xfr;': '\U0001d51b',
-    'xfr;': '\U0001d535',
-    'xhArr;': '\u27fa',
-    'xharr;': '\u27f7',
-    'Xi;': '\u039e',
-    'xi;': '\u03be',
-    'xlArr;': '\u27f8',
-    'xlarr;': '\u27f5',
-    'xmap;': '\u27fc',
-    'xnis;': '\u22fb',
-    'xodot;': '\u2a00',
-    'Xopf;': '\U0001d54f',
-    'xopf;': '\U0001d569',
-    'xoplus;': '\u2a01',
-    'xotime;': '\u2a02',
-    'xrArr;': '\u27f9',
-    'xrarr;': '\u27f6',
-    'Xscr;': '\U0001d4b3',
-    'xscr;': '\U0001d4cd',
-    'xsqcup;': '\u2a06',
-    'xuplus;': '\u2a04',
-    'xutri;': '\u25b3',
-    'xvee;': '\u22c1',
-    'xwedge;': '\u22c0',
-    'Yacute': '\xdd',
-    'yacute': '\xfd',
-    'Yacute;': '\xdd',
-    'yacute;': '\xfd',
-    'YAcy;': '\u042f',
-    'yacy;': '\u044f',
-    'Ycirc;': '\u0176',
-    'ycirc;': '\u0177',
-    'Ycy;': '\u042b',
-    'ycy;': '\u044b',
-    'yen': '\xa5',
-    'yen;': '\xa5',
-    'Yfr;': '\U0001d51c',
-    'yfr;': '\U0001d536',
-    'YIcy;': '\u0407',
-    'yicy;': '\u0457',
-    'Yopf;': '\U0001d550',
-    'yopf;': '\U0001d56a',
-    'Yscr;': '\U0001d4b4',
-    'yscr;': '\U0001d4ce',
-    'YUcy;': '\u042e',
-    'yucy;': '\u044e',
-    'yuml': '\xff',
-    'Yuml;': '\u0178',
-    'yuml;': '\xff',
-    'Zacute;': '\u0179',
-    'zacute;': '\u017a',
-    'Zcaron;': '\u017d',
-    'zcaron;': '\u017e',
-    'Zcy;': '\u0417',
-    'zcy;': '\u0437',
-    'Zdot;': '\u017b',
-    'zdot;': '\u017c',
-    'zeetrf;': '\u2128',
-    'ZeroWidthSpace;': '\u200b',
-    'Zeta;': '\u0396',
-    'zeta;': '\u03b6',
-    'Zfr;': '\u2128',
-    'zfr;': '\U0001d537',
-    'ZHcy;': '\u0416',
-    'zhcy;': '\u0436',
-    'zigrarr;': '\u21dd',
-    'Zopf;': '\u2124',
-    'zopf;': '\U0001d56b',
-    'Zscr;': '\U0001d4b5',
-    'zscr;': '\U0001d4cf',
-    'zwj;': '\u200d',
-    'zwnj;': '\u200c',
-}
-
+from html.entities import html5
 
 class EntitySubstitution(object):
     """The ability to substitute XML or HTML entities for certain characters."""
diff --git a/lib/bs4/diagnose.py b/lib/bs4/diagnose.py
index 500e92df..e079772e 100644
--- a/lib/bs4/diagnose.py
+++ b/lib/bs4/diagnose.py
@@ -4,7 +4,7 @@
 __license__ = "MIT"
 
 import cProfile
-from io import StringIO
+from io import BytesIO
 from html.parser import HTMLParser
 import bs4
 from bs4 import BeautifulSoup, __version__
@@ -59,21 +59,6 @@ def diagnose(data):
 
     if hasattr(data, 'read'):
         data = data.read()
-    elif data.startswith("http:") or data.startswith("https:"):
-        print(('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data))
-        print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
-        return
-    else:
-        try:
-            if os.path.exists(data):
-                print(('"%s" looks like a filename. Reading data from the file.' % data))
-                with open(data) as fp:
-                    data = fp.read()
-        except ValueError:
-            # This can happen on some platforms when the 'filename' is
-            # too long. Assume it's data and not a filename.
-            pass
-        print("")
 
     for parser in basic_parsers:
         print(("Trying to parse your markup with %s" % parser))
@@ -103,7 +88,13 @@ def lxml_trace(data, html=True, **kwargs):
        if False, lxml's XML parser will be used.
     """
     from lxml import etree
-    for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
+    recover = kwargs.pop('recover', True)
+    if isinstance(data, str):
+        data = data.encode("utf8")
+    reader = BytesIO(data)
+    for event, element in etree.iterparse(
+        reader, html=html, recover=recover, **kwargs
+    ):
         print(("%s, %4s, %s" % (event, element.tag, element.text)))
 
 class AnnouncingParser(HTMLParser):
diff --git a/lib/bs4/element.py b/lib/bs4/element.py
index 82a986e4..0aefe734 100644
--- a/lib/bs4/element.py
+++ b/lib/bs4/element.py
@@ -8,14 +8,8 @@ except ImportError as e:
 import re
 import sys
 import warnings
-try:
-    import soupsieve
-except ImportError as e:
-    soupsieve = None
-    warnings.warn(
-        'The soupsieve package is not installed. CSS selectors cannot be used.'
-    )
 
+from bs4.css import CSS
 from bs4.formatter import (
     Formatter,
     HTMLFormatter,
@@ -23,7 +17,6 @@ from bs4.formatter import (
 )
 
 DEFAULT_OUTPUT_ENCODING = "utf-8"
-PY3K = (sys.version_info[0] > 2)
 
 nonwhitespace_re = re.compile(r"\S+")
 
@@ -70,13 +63,13 @@ PYTHON_SPECIFIC_ENCODINGS = set([
     "string-escape",
     "string_escape",
 ])
-    
+
 
 class NamespacedAttribute(str):
     """A namespaced string (e.g. 'xml:lang') that remembers the namespace
     ('xml') and the name ('lang') that were used to create it.
     """
-    
+
     def __new__(cls, prefix, name=None, namespace=None):
         if not name:
             # This is the default namespace. Its name "has no value"
@@ -147,14 +140,19 @@ class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
             return match.group(1) + encoding
         return self.CHARSET_RE.sub(rewrite, self.original_value)
 
-    
+
 class PageElement(object):
     """Contains the navigational information for some part of the page:
     that is, its current location in the parse tree.
 
     NavigableString, Tag, etc. are all subclasses of PageElement.
     """
-   
+
+    # In general, we can't tell just by looking at an element whether
+    # it's contained in an XML document or an HTML document. But for
+    # Tags (q.v.) we can store this information at parse time.
+    known_xml = None
+
     def setup(self, parent=None, previous_element=None, next_element=None,
               previous_sibling=None, next_sibling=None):
         """Sets up the initial relations between this element and
@@ -164,7 +162,7 @@ class PageElement(object):
 
         :param previous_element: The element parsed immediately before
             this one.
-        
+
         :param next_element: The element parsed immediately before
             this one.
 
@@ -258,11 +256,11 @@ class PageElement(object):
     default = object()
     def _all_strings(self, strip=False, types=default):
         """Yield all strings of certain classes, possibly stripping them.
-        
+
         This is implemented differently in Tag and NavigableString.
         """
         raise NotImplementedError()
-   
+
     @property
     def stripped_strings(self):
         """Yield all strings in this PageElement, stripping them first.
@@ -295,11 +293,11 @@ class PageElement(object):
                     strip, types=types)])
     getText = get_text
     text = property(get_text)
-    
+
     def replace_with(self, *args):
-        """Replace this PageElement with one or more PageElements, keeping the 
+        """Replace this PageElement with one or more PageElements, keeping the
         rest of the tree the same.
-        
+
         :param args: One or more PageElements.
         :return: `self`, no longer part of the tree.
         """
@@ -411,7 +409,7 @@ class PageElement(object):
         This works the same way as `list.insert`.
 
         :param position: The numeric position that should be occupied
-           in `self.children` by the new PageElement. 
+           in `self.children` by the new PageElement.
         :param new_child: A PageElement.
         """
         if new_child is None:
@@ -497,13 +495,16 @@ class PageElement(object):
     def extend(self, tags):
         """Appends the given PageElements to this one's contents.
 
-        :param tags: A list of PageElements.
+        :param tags: A list of PageElements. If a single Tag is
+            provided instead, this PageElement's contents will be extended
+            with that Tag's contents.
         """
         if isinstance(tags, Tag):
-            # Calling self.append() on another tag's contents will change
-            # the list we're iterating over. Make a list that won't
-            # change.
-            tags = list(tags.contents)
+            tags = tags.contents
+        if isinstance(tags, list):
+            # Moving items around the tree may change their position in
+            # the original list. Make a list that won't change.
+            tags = list(tags)
         for tag in tags:
             self.append(tag)
 
@@ -544,7 +545,7 @@ class PageElement(object):
                 "Element has no parent, so 'after' has no meaning.")
         if any(x is self for x in args):
             raise ValueError("Can't insert an element after itself.")
-        
+
         offset = 0
         for successor in args:
             # Extract first so that the index won't be screwed up if they
@@ -555,7 +556,7 @@ class PageElement(object):
             parent.insert(index+1+offset, successor)
             offset += 1
 
-    def find_next(self, name=None, attrs={}, text=None, **kwargs):
+    def find_next(self, name=None, attrs={}, string=None, **kwargs):
         """Find the first PageElement that matches the given criteria and
         appears later in the document than this PageElement.
 
@@ -564,15 +565,15 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :kwargs: A dictionary of filters on attribute values.
         :return: A PageElement.
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
-        return self._find_one(self.find_all_next, name, attrs, text, **kwargs)
+        return self._find_one(self.find_all_next, name, attrs, string, **kwargs)
     findNext = find_next  # BS3
 
-    def find_all_next(self, name=None, attrs={}, text=None, limit=None,
+    def find_all_next(self, name=None, attrs={}, string=None, limit=None,
                     **kwargs):
         """Find all PageElements that match the given criteria and appear
         later in the document than this PageElement.
@@ -582,16 +583,17 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :param limit: Stop looking after finding this many results.
         :kwargs: A dictionary of filters on attribute values.
         :return: A ResultSet containing PageElements.
         """
-        return self._find_all(name, attrs, text, limit, self.next_elements,
-                             **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
+        return self._find_all(name, attrs, string, limit, self.next_elements,
+                              _stacklevel=_stacklevel+1, **kwargs)
     findAllNext = find_all_next  # BS3
 
-    def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
+    def find_next_sibling(self, name=None, attrs={}, string=None, **kwargs):
         """Find the closest sibling to this PageElement that matches the
         given criteria and appears later in the document.
 
@@ -600,16 +602,16 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :kwargs: A dictionary of filters on attribute values.
         :return: A PageElement.
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
-        return self._find_one(self.find_next_siblings, name, attrs, text,
+        return self._find_one(self.find_next_siblings, name, attrs, string,
                              **kwargs)
     findNextSibling = find_next_sibling  # BS3
 
-    def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
+    def find_next_siblings(self, name=None, attrs={}, string=None, limit=None,
                            **kwargs):
         """Find all siblings of this PageElement that match the given criteria
         and appear later in the document.
@@ -619,18 +621,21 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :param limit: Stop looking after finding this many results.
         :kwargs: A dictionary of filters on attribute values.
         :return: A ResultSet of PageElements.
         :rtype: bs4.element.ResultSet
         """
-        return self._find_all(name, attrs, text, limit,
-                              self.next_siblings, **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
+        return self._find_all(
+            name, attrs, string, limit,
+            self.next_siblings, _stacklevel=_stacklevel+1, **kwargs
+        )
     findNextSiblings = find_next_siblings   # BS3
     fetchNextSiblings = find_next_siblings  # BS2
 
-    def find_previous(self, name=None, attrs={}, text=None, **kwargs):
+    def find_previous(self, name=None, attrs={}, string=None, **kwargs):
         """Look backwards in the document from this PageElement and find the
         first PageElement that matches the given criteria.
 
@@ -639,16 +644,16 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :kwargs: A dictionary of filters on attribute values.
         :return: A PageElement.
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
         return self._find_one(
-            self.find_all_previous, name, attrs, text, **kwargs)
+            self.find_all_previous, name, attrs, string, **kwargs)
     findPrevious = find_previous  # BS3
 
-    def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
+    def find_all_previous(self, name=None, attrs={}, string=None, limit=None,
                         **kwargs):
         """Look backwards in the document from this PageElement and find all
         PageElements that match the given criteria.
@@ -658,18 +663,21 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :param limit: Stop looking after finding this many results.
         :kwargs: A dictionary of filters on attribute values.
         :return: A ResultSet of PageElements.
         :rtype: bs4.element.ResultSet
         """
-        return self._find_all(name, attrs, text, limit, self.previous_elements,
-                           **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
+        return self._find_all(
+            name, attrs, string, limit, self.previous_elements,
+            _stacklevel=_stacklevel+1, **kwargs
+        )
     findAllPrevious = find_all_previous  # BS3
     fetchPrevious = find_all_previous    # BS2
 
-    def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
+    def find_previous_sibling(self, name=None, attrs={}, string=None, **kwargs):
         """Returns the closest sibling to this PageElement that matches the
         given criteria and appears earlier in the document.
 
@@ -678,16 +686,16 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :kwargs: A dictionary of filters on attribute values.
         :return: A PageElement.
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
-        return self._find_one(self.find_previous_siblings, name, attrs, text,
+        return self._find_one(self.find_previous_siblings, name, attrs, string,
                              **kwargs)
     findPreviousSibling = find_previous_sibling  # BS3
 
-    def find_previous_siblings(self, name=None, attrs={}, text=None,
+    def find_previous_siblings(self, name=None, attrs={}, string=None,
                                limit=None, **kwargs):
         """Returns all siblings to this PageElement that match the
         given criteria and appear earlier in the document.
@@ -697,14 +705,17 @@ class PageElement(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :param limit: Stop looking after finding this many results.
         :kwargs: A dictionary of filters on attribute values.
         :return: A ResultSet of PageElements.
         :rtype: bs4.element.ResultSet
         """
-        return self._find_all(name, attrs, text, limit,
-                              self.previous_siblings, **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
+        return self._find_all(
+            name, attrs, string, limit,
+            self.previous_siblings, _stacklevel=_stacklevel+1, **kwargs
+        )
     findPreviousSiblings = find_previous_siblings   # BS3
     fetchPreviousSiblings = find_previous_siblings  # BS2
 
@@ -725,7 +736,7 @@ class PageElement(object):
         # NOTE: We can't use _find_one because findParents takes a different
         # set of arguments.
         r = None
-        l = self.find_parents(name, attrs, 1, **kwargs)
+        l = self.find_parents(name, attrs, 1, _stacklevel=3, **kwargs)
         if l:
             r = l[0]
         return r
@@ -745,8 +756,9 @@ class PageElement(object):
         :return: A PageElement.
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
+        _stacklevel = kwargs.pop('_stacklevel', 2)
         return self._find_all(name, attrs, None, limit, self.parents,
-                             **kwargs)
+                              _stacklevel=_stacklevel+1, **kwargs)
     findParents = find_parents   # BS3
     fetchParents = find_parents  # BS2
 
@@ -770,26 +782,30 @@ class PageElement(object):
 
     #These methods do the real heavy lifting.
 
-    def _find_one(self, method, name, attrs, text, **kwargs):
+    def _find_one(self, method, name, attrs, string, **kwargs):
         r = None
-        l = method(name, attrs, text, 1, **kwargs)
+        l = method(name, attrs, string, 1, _stacklevel=4, **kwargs)
         if l:
             r = l[0]
         return r
 
-    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
+    def _find_all(self, name, attrs, string, limit, generator, **kwargs):
         "Iterates over a generator looking for things that match."
+        _stacklevel = kwargs.pop('_stacklevel', 3)
 
-        if text is None and 'string' in kwargs:
-            text = kwargs['string']
-            del kwargs['string']
+        if string is None and 'text' in kwargs:
+            string = kwargs.pop('text')
+            warnings.warn(
+                "The 'text' argument to find()-type methods is deprecated. Use 'string' instead.",
+                DeprecationWarning, stacklevel=_stacklevel
+            )
 
         if isinstance(name, SoupStrainer):
             strainer = name
         else:
-            strainer = SoupStrainer(name, attrs, text, **kwargs)
+            strainer = SoupStrainer(name, attrs, string, **kwargs)
 
-        if text is None and not limit and not attrs and not kwargs:
+        if string is None and not limit and not attrs and not kwargs:
             if name is True or name is None:
                 # Optimization to find all tags.
                 result = (element for element in generator
@@ -895,7 +911,7 @@ class PageElement(object):
         :rtype: bool
         """
         return getattr(self, '_decomposed', False) or False
-            
+   
     # Old non-property versions of the generators, for backwards
     # compatibility with BS3.
     def nextGenerator(self):
@@ -919,16 +935,11 @@ class NavigableString(str, PageElement):
 
     When Beautiful Soup parses the markup <b>penguin</b>, it will
     create a NavigableString for the string "penguin".
-    """   
+    """
 
     PREFIX = ''
     SUFFIX = ''
 
-    # We can't tell just by looking at a string whether it's contained
-    # in an XML document or an HTML document.
-
-    known_xml = None
-
     def __new__(cls, value):
         """Create a new NavigableString.
 
@@ -944,12 +955,22 @@ class NavigableString(str, PageElement):
         u.setup()
         return u
 
-    def __copy__(self):
+    def __deepcopy__(self, memo, recursive=False):
         """A copy of a NavigableString has the same contents and class
         as the original, but it is not connected to the parse tree.
+
+        :param recursive: This parameter is ignored; it's only defined
+           so that NavigableString.__deepcopy__ implements the same
+           signature as Tag.__deepcopy__.
         """
         return type(self)(self)
 
+    def __copy__(self):
+        """A copy of a NavigableString can only be a deep copy, because
+        only one PageElement can occupy a given place in a parse tree.
+        """
+        return self.__deepcopy__({})
+
     def __getnewargs__(self):
         return (str(self),)
 
@@ -1013,6 +1034,11 @@ class NavigableString(str, PageElement):
 
         # Do nothing if the caller is looking for specific types of
         # string, and we're of a different type.
+        #
+        # We check specific types instead of using isinstance(self,
+        # types) because all of these classes subclass
+        # NavigableString. Anyone who's using this feature probably
+        # wants generic NavigableStrings but not other stuff.
         my_type = type(self)
         if types is not None:
             if isinstance(types, type):
@@ -1037,10 +1063,10 @@ class PreformattedString(NavigableString):
     as comments (the Comment class) and CDATA blocks (the CData
     class).
     """
-    
+
     PREFIX = ''
     SUFFIX = ''
-    
+
     def output_ready(self, formatter=None):
         """Make this string ready for output by adding any subclass-specific
             prefix or suffix.
@@ -1122,7 +1148,7 @@ class Stylesheet(NavigableString):
     """
     pass
 
-    
+
 class Script(NavigableString):
     """A NavigableString representing an executable script (probably
     Javascript).
@@ -1141,6 +1167,27 @@ class TemplateString(NavigableString):
     pass
 
 
+class RubyTextString(NavigableString):
+    """A NavigableString representing the contents of the <rt> HTML
+    element.
+
+    https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rt-element
+
+    Can be used to distinguish such strings from the strings they're
+    annotating.
+    """
+    pass
+
+
+class RubyParenthesisString(NavigableString):
+    """A NavigableString representing the contents of the <rp> HTML
+    element.
+
+    https://dev.w3.org/html5/spec-LC/text-level-semantics.html#the-rp-element
+    """
+    pass
+
+
 class Tag(PageElement):
     """Represents an HTML or XML tag that is part of a parse tree, along
     with its attributes and contents.
@@ -1155,6 +1202,7 @@ class Tag(PageElement):
                  can_be_empty_element=None, cdata_list_attributes=None,
                  preserve_whitespace_tags=None,
                  interesting_string_types=None,
+                 namespaces=None
     ):
         """Basic constructor.
 
@@ -1187,6 +1235,9 @@ class Tag(PageElement):
             to be considered. The default is to consider
             NavigableString and CData the only interesting string
             subtypes.
+        :param namespaces: A dictionary mapping currently active
+            namespace prefixes to URIs. This can be used later to
+            construct CSS selectors.
         """
         if parser is None:
             self.parser_class = None
@@ -1198,11 +1249,12 @@ class Tag(PageElement):
             raise ValueError("No value provided for new tag's name.")
         self.name = name
         self.namespace = namespace
+        self._namespaces = namespaces or {}
         self.prefix = prefix
         if ((not builder or builder.store_line_numbers)
             and (sourceline is not None or sourcepos is not None)):
             self.sourceline = sourceline
-            self.sourcepos = sourcepos        
+            self.sourcepos = sourcepos
         if attrs is None:
             attrs = {}
         elif attrs:
@@ -1260,27 +1312,62 @@ class Tag(PageElement):
                 self.interesting_string_types = builder.string_containers[self.name]
             else:
                 self.interesting_string_types = self.DEFAULT_INTERESTING_STRING_TYPES
-            
+
     parserClass = _alias("parser_class")  # BS3
 
-    def __copy__(self):
-        """A copy of a Tag is a new Tag, unconnected to the parse tree.
+    def __deepcopy__(self, memo, recursive=True):
+        """A deepcopy of a Tag is a new Tag, unconnected to the parse tree.
         Its contents are a copy of the old Tag's contents.
         """
+        clone = self._clone()
+
+        if recursive:
+            # Clone this tag's descendants recursively, but without
+            # making any recursive function calls.
+            tag_stack = [clone]
+            for event, element in self._event_stream(self.descendants):
+                if event is Tag.END_ELEMENT_EVENT:
+                    # Stop appending incoming Tags to the Tag that was
+                    # just closed.
+                    tag_stack.pop()
+                else:
+                    descendant_clone = element.__deepcopy__(
+                        memo, recursive=False
+                    )
+                    # Add to its parent's .contents
+                    tag_stack[-1].append(descendant_clone)
+
+                    if event is Tag.START_ELEMENT_EVENT:
+                        # Add the Tag itself to the stack so that its
+                        # children will be .appended to it.
+                        tag_stack.append(descendant_clone)
+        return clone
+
+    def __copy__(self):
+        """A copy of a Tag must always be a deep copy, because a Tag's
+        children can only have one parent at a time.
+        """
+        return self.__deepcopy__({})
+
+    def _clone(self):
+        """Create a new Tag just like this one, but with no
+        contents and unattached to any parse tree.
+
+        This is the first step in the deepcopy process.
+        """
         clone = type(self)(
-            None, self.builder, self.name, self.namespace,
+            None, None, self.name, self.namespace,
             self.prefix, self.attrs, is_xml=self._is_xml,
             sourceline=self.sourceline, sourcepos=self.sourcepos,
             can_be_empty_element=self.can_be_empty_element,
             cdata_list_attributes=self.cdata_list_attributes,
-            preserve_whitespace_tags=self.preserve_whitespace_tags
+            preserve_whitespace_tags=self.preserve_whitespace_tags,
+            interesting_string_types=self.interesting_string_types
         )
         for attr in ('can_be_empty_element', 'hidden'):
             setattr(clone, attr, getattr(self, attr))
-        for child in self.contents:
-            clone.append(child.__copy__())
         return clone
-
+    
     @property
     def is_empty_element(self):
         """Is this tag an empty-element tag? (aka a self-closing tag)
@@ -1384,7 +1471,7 @@ class Tag(PageElement):
             i.contents = []
             i._decomposed = True
             i = n
-           
+
     def clear(self, decompose=False):
         """Wipe out all children of this PageElement by calling extract()
            on them.
@@ -1472,7 +1559,7 @@ class Tag(PageElement):
         if not isinstance(value, list):
             value = [value]
         return value
-    
+
     def has_attr(self, key):
         """Does this PageElement have an attribute with the given name?"""
         return key in self.attrs
@@ -1524,7 +1611,8 @@ class Tag(PageElement):
             warnings.warn(
                 '.%(name)sTag is deprecated, use .find("%(name)s") instead. If you really were looking for a tag called %(name)sTag, use .find("%(name)sTag")' % dict(
                     name=tag_name
-                )
+                ),
+                DeprecationWarning, stacklevel=2
             )
             return self.find(tag_name)
         # We special case contents to avoid recursion.
@@ -1559,35 +1647,18 @@ class Tag(PageElement):
         """Renders this PageElement as a string.
 
         :param encoding: The encoding to use (Python 2 only).
-        :return: Under Python 2, a bytestring; under Python 3,
-            a Unicode string.
+            TODO: This is now ignored and a warning should be issued
+            if a value is provided.
+        :return: A (Unicode) string.
         """
-        if PY3K:
-            # "The return value must be a string object", i.e. Unicode
-            return self.decode()
-        else:
-            # "The return value must be a string object", i.e. a bytestring.
-            # By convention, the return value of __repr__ should also be
-            # an ASCII string.
-            return self.encode(encoding)
+        # "The return value must be a string object", i.e. Unicode
+        return self.decode()
 
     def __unicode__(self):
         """Renders this PageElement as a Unicode string."""
         return self.decode()
 
-    def __str__(self):
-        """Renders this PageElement as a generic string.
-
-        :return: Under Python 2, a UTF-8 bytestring; under Python 3,
-            a Unicode string.        
-        """
-        if PY3K:
-            return self.decode()
-        else:
-            return self.encode()
-
-    if PY3K:
-        __str__ = __repr__ = __unicode__
+    __str__ = __repr__ = __unicode__
 
     def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
                indent_level=None, formatter="minimal",
@@ -1597,8 +1668,10 @@ class Tag(PageElement):
 
         :param encoding: The destination encoding.
         :param indent_level: Each line of the rendering will be
-            indented this many spaces. Used internally in
-            recursive calls while pretty-printing.
+           indented this many levels. (The formatter decides what a
+           'level' means in terms of spaces or other characters
+           output.) Used internally in recursive calls while
+           pretty-printing.
         :param formatter: A Formatter object, or a string naming one of
             the standard formatters.
         :param errors: An error handling strategy such as
@@ -1615,106 +1688,217 @@ class Tag(PageElement):
 
     def decode(self, indent_level=None,
                eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
-        """Render a Unicode representation of this PageElement and its
-        contents.
-
-        :param indent_level: Each line of the rendering will be
-             indented this many spaces. Used internally in
-             recursive calls while pretty-printing.
-        :param eventual_encoding: The tag is destined to be
-            encoded into this encoding. This method is _not_
-            responsible for performing that encoding. This information
-            is passed in so that it can be substituted in if the
-            document contains a <META> tag that mentions the document's
-            encoding.
-        :param formatter: A Formatter object, or a string naming one of
-            the standard formatters.
-        """
-
+               formatter="minimal",
+               iterator=None):
+        pieces = []
         # First off, turn a non-Formatter `formatter` into a Formatter
         # object. This will stop the lookup from happening over and
         # over again.
         if not isinstance(formatter, Formatter):
             formatter = self.formatter_for_name(formatter)
-        attributes = formatter.attributes(self)
-        attrs = []
-        for key, val in attributes:
-            if val is None:
-                decoded = key
+
+        if indent_level is True:
+            indent_level = 0
+
+        # The currently active tag that put us into string literal
+        # mode. Until this element is closed, children will be treated
+        # as string literals and not pretty-printed. String literal
+        # mode is turned on immediately after this tag begins, and
+        # turned off immediately before it's closed. This means there
+        # will be whitespace before and after the tag itself.
+        string_literal_tag = None
+
+        for event, element in self._event_stream(iterator):
+            if event in (Tag.START_ELEMENT_EVENT, Tag.EMPTY_ELEMENT_EVENT):
+                piece = element._format_tag(
+                    eventual_encoding, formatter, opening=True
+                )
+            elif event is Tag.END_ELEMENT_EVENT:
+                piece = element._format_tag(
+                    eventual_encoding, formatter, opening=False
+                )
+                if indent_level is not None:
+                    indent_level -= 1
             else:
-                if isinstance(val, list) or isinstance(val, tuple):
-                    val = ' '.join(val)
-                elif not isinstance(val, str):
-                    val = str(val)
-                elif (
-                        isinstance(val, AttributeValueWithCharsetSubstitution)
-                        and eventual_encoding is not None
-                ):
-                    val = val.encode(eventual_encoding)
+                piece = element.output_ready(formatter)
 
-                text = formatter.attribute_value(val)
-                decoded = (
-                    str(key) + '='
-                    + formatter.quoted_attribute_value(text))
-            attrs.append(decoded)
-        close = ''
-        closeTag = ''
+            # Now we need to apply the 'prettiness' -- extra
+            # whitespace before and/or after this tag. This can get
+            # complicated because certain tags, like <pre> and
+            # <script>, can't be prettified, since adding whitespace would
+            # change the meaning of the content.
 
+            # The default behavior is to add whitespace before and
+            # after an element when string literal mode is off, and to
+            # leave things as they are when string literal mode is on.
+            if string_literal_tag:
+                indent_before = indent_after = False
+            else:
+                indent_before = indent_after = True
+
+            # The only time the behavior is more complex than that is
+            # when we encounter an opening or closing tag that might
+            # put us into or out of string literal mode.
+            if (event is Tag.START_ELEMENT_EVENT
+                and not string_literal_tag
+                and not element._should_pretty_print()):
+                    # We are about to enter string literal mode. Add
+                    # whitespace before this tag, but not after. We
+                    # will stay in string literal mode until this tag
+                    # is closed.
+                    indent_before = True
+                    indent_after = False
+                    string_literal_tag = element
+            elif (event is Tag.END_ELEMENT_EVENT
+                  and element is string_literal_tag):
+                # We are about to exit string literal mode by closing
+                # the tag that sent us into that mode. Add whitespace
+                # after this tag, but not before.
+                indent_before = False
+                indent_after = True
+                string_literal_tag = None
+
+            # Now we know whether to add whitespace before and/or
+            # after this element.
+            if indent_level is not None:
+                if (indent_before or indent_after):
+                    if isinstance(element, NavigableString):
+                        piece = piece.strip()
+                    if piece:
+                        piece = self._indent_string(
+                            piece, indent_level, formatter,
+                            indent_before, indent_after
+                        )
+                if event == Tag.START_ELEMENT_EVENT:
+                    indent_level += 1
+            pieces.append(piece)
+        return "".join(pieces)
+
+    # Names for the different events yielded by _event_stream
+    START_ELEMENT_EVENT = object()
+    END_ELEMENT_EVENT = object()
+    EMPTY_ELEMENT_EVENT = object()
+    STRING_ELEMENT_EVENT = object()
+
+    def _event_stream(self, iterator=None):
+        """Yield a sequence of events that can be used to reconstruct the DOM
+        for this element.
+
+        This lets us recreate the nested structure of this element
+        (e.g. when formatting it as a string) without using recursive
+        method calls.
+
+        This is similar in concept to the SAX API, but it's a simpler
+        interface designed for internal use. The events are different
+        from SAX and the arguments associated with the events are Tags
+        and other Beautiful Soup objects.
+
+        :param iterator: An alternate iterator to use when traversing
+         the tree.
+        """
+        tag_stack = []
+
+        iterator = iterator or self.self_and_descendants
+
+        for c in iterator:
+            # If the parent of the element we're about to yield is not
+            # the tag currently on the stack, it means that the tag on
+            # the stack closed before this element appeared.
+            while tag_stack and c.parent != tag_stack[-1]:
+                now_closed_tag = tag_stack.pop()
+                yield Tag.END_ELEMENT_EVENT, now_closed_tag
+
+            if isinstance(c, Tag):
+                if c.is_empty_element:
+                    yield Tag.EMPTY_ELEMENT_EVENT, c
+                else:
+                    yield Tag.START_ELEMENT_EVENT, c
+                    tag_stack.append(c)
+                    continue
+            else:
+                yield Tag.STRING_ELEMENT_EVENT, c
+
+        while tag_stack:
+            now_closed_tag = tag_stack.pop()
+            yield Tag.END_ELEMENT_EVENT, now_closed_tag
+
+    def _indent_string(self, s, indent_level, formatter,
+                       indent_before, indent_after):
+        """Add indentation whitespace before and/or after a string.
+
+        :param s: The string to amend with whitespace.
+        :param indent_level: The indentation level; affects how much
+           whitespace goes before the string.
+        :param indent_before: Whether or not to add whitespace
+           before the string.
+        :param indent_after: Whether or not to add whitespace
+           (a newline) after the string.
+        """
+        space_before = ''
+        if indent_before and indent_level:
+            space_before = (formatter.indent * indent_level)
+
+        space_after = ''
+        if indent_after:
+            space_after = "\n"
+
+        return space_before + s + space_after
+
+    def _format_tag(self, eventual_encoding, formatter, opening):
+        if self.hidden:
+            # A hidden tag is invisible, although its contents
+            # are visible.
+            return ''
+
+        # A tag starts with the < character (see below).
+
+        # Then the / character, if this is a closing tag.
+        closing_slash = ''
+        if not opening:
+            closing_slash = '/'
+
+        # Then an optional namespace prefix.
         prefix = ''
         if self.prefix:
             prefix = self.prefix + ":"
 
-        if self.is_empty_element:
-            close = formatter.void_element_close_prefix or ''
-        else:
-            closeTag = '</%s%s>' % (prefix, self.name)
+        # Then a list of attribute values, if this is an opening tag.
+        attribute_string = ''
+        if opening:
+            attributes = formatter.attributes(self)
+            attrs = []
+            for key, val in attributes:
+                if val is None:
+                    decoded = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, str):
+                        val = str(val)
+                    elif (
+                            isinstance(val, AttributeValueWithCharsetSubstitution)
+                            and eventual_encoding is not None
+                    ):
+                        val = val.encode(eventual_encoding)
 
-        pretty_print = self._should_pretty_print(indent_level)
-        space = ''
-        indent_space = ''
-        if indent_level is not None:
-            indent_space = (' ' * (indent_level - 1))
-        if pretty_print:
-            space = indent_space
-            indent_contents = indent_level + 1
-        else:
-            indent_contents = None
-        contents = self.decode_contents(
-            indent_contents, eventual_encoding, formatter
-        )
-
-        if self.hidden:
-            # This is the 'document root' object.
-            s = contents
-        else:
-            s = []
-            attribute_string = ''
+                    text = formatter.attribute_value(val)
+                    decoded = (
+                        str(key) + '='
+                        + formatter.quoted_attribute_value(text))
+                attrs.append(decoded)
             if attrs:
                 attribute_string = ' ' + ' '.join(attrs)
-            if indent_level is not None:
-                # Even if this particular tag is not pretty-printed,
-                # we should indent up to the start of the tag.
-                s.append(indent_space)
-            s.append('<%s%s%s%s>' % (
-                    prefix, self.name, attribute_string, close))
-            if pretty_print:
-                s.append("\n")
-            s.append(contents)
-            if pretty_print and contents and contents[-1] != "\n":
-                s.append("\n")
-            if pretty_print and closeTag:
-                s.append(space)
-            s.append(closeTag)
-            if indent_level is not None and closeTag and self.next_sibling:
-                # Even if this particular tag is not pretty-printed,
-                # we're now done with the tag, and we should add a
-                # newline if appropriate.
-                s.append("\n")
-            s = ''.join(s)
-        return s
 
-    def _should_pretty_print(self, indent_level):
+        # Then an optional closing slash (for a void element in an
+        # XML document).
+        void_element_closing_slash = ''
+        if self.is_empty_element:
+            void_element_closing_slash = formatter.void_element_close_prefix or ''
+
+        # Put it all together.
+        return '<' + closing_slash + prefix + self.name + attribute_string + void_element_closing_slash + '>'
+
+    def _should_pretty_print(self, indent_level=1):
         """Should this tag be pretty-printed?
 
         Most of them should, but some (such as <pre> in HTML
@@ -1735,7 +1919,7 @@ class Tag(PageElement):
             a Unicode string will be returned.
         :param formatter: A Formatter object, or a string naming one of
             the standard formatters.
-        :return: A Unicode string (if encoding==None) or a bytestring 
+        :return: A Unicode string (if encoding==None) or a bytestring
             (otherwise).
         """
         if encoding is None:
@@ -1749,8 +1933,10 @@ class Tag(PageElement):
         """Renders the contents of this tag as a Unicode string.
 
         :param indent_level: Each line of the rendering will be
-           indented this many spaces. Used internally in
-           recursive calls while pretty-printing.
+           indented this many levels. (The formatter decides what a
+           'level' means in terms of spaces or other characters
+           output.) Used internally in recursive calls while
+           pretty-printing.
 
         :param eventual_encoding: The tag is destined to be
            encoded into this encoding. decode_contents() is _not_
@@ -1761,42 +1947,21 @@ class Tag(PageElement):
 
         :param formatter: A Formatter object, or a string naming one of
             the standard Formatters.
-        """
-        # First off, turn a string formatter into a Formatter object. This
-        # will stop the lookup from happening over and over again.
-        if not isinstance(formatter, Formatter):
-            formatter = self.formatter_for_name(formatter)
 
-        pretty_print = (indent_level is not None)
-        s = []
-        for c in self:
-            text = None
-            if isinstance(c, NavigableString):
-                text = c.output_ready(formatter)
-            elif isinstance(c, Tag):
-                s.append(c.decode(indent_level, eventual_encoding,
-                                  formatter))
-            preserve_whitespace = (
-                self.preserve_whitespace_tags and self.name in self.preserve_whitespace_tags
-            )
-            if text and indent_level and not preserve_whitespace:
-                text = text.strip()
-            if text:
-                if pretty_print and not preserve_whitespace:
-                    s.append(" " * (indent_level - 1))
-                s.append(text)
-                if pretty_print and not preserve_whitespace:
-                    s.append("\n")
-        return ''.join(s)
-       
+        """
+        return self.decode(indent_level, eventual_encoding, formatter,
+                           iterator=self.descendants)
+
     def encode_contents(
         self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
         formatter="minimal"):
         """Renders the contents of this PageElement as a bytestring.
 
         :param indent_level: Each line of the rendering will be
-           indented this many spaces. Used internally in
-           recursive calls while pretty-printing.
+           indented this many levels. (The formatter decides what a
+           'level' means in terms of spaces or other characters
+           output.) Used internally in recursive calls while
+           pretty-printing.
 
         :param eventual_encoding: The bytestring will be in this encoding.
 
@@ -1819,7 +1984,7 @@ class Tag(PageElement):
 
     #Soup methods
 
-    def find(self, name=None, attrs={}, recursive=True, text=None,
+    def find(self, name=None, attrs={}, recursive=True, string=None,
              **kwargs):
         """Look in the children of this PageElement and find the first
         PageElement that matches the given criteria.
@@ -1838,13 +2003,14 @@ class Tag(PageElement):
         :rtype: bs4.element.Tag | bs4.element.NavigableString
         """
         r = None
-        l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
+        l = self.find_all(name, attrs, recursive, string, 1, _stacklevel=3,
+                          **kwargs)
         if l:
             r = l[0]
         return r
     findChild = find #BS2
 
-    def find_all(self, name=None, attrs={}, recursive=True, text=None,
+    def find_all(self, name=None, attrs={}, recursive=True, string=None,
                  limit=None, **kwargs):
         """Look in the children of this PageElement and find all
         PageElements that match the given criteria.
@@ -1865,7 +2031,9 @@ class Tag(PageElement):
         generator = self.descendants
         if not recursive:
             generator = self.children
-        return self._find_all(name, attrs, text, limit, generator, **kwargs)
+        _stacklevel = kwargs.pop('_stacklevel', 2)
+        return self._find_all(name, attrs, string, limit, generator,
+                              _stacklevel=_stacklevel+1, **kwargs)
     findAll = find_all       # BS3
     findChildren = find_all  # BS2
 
@@ -1879,6 +2047,18 @@ class Tag(PageElement):
         # return iter() to make the purpose of the method clear
         return iter(self.contents)  # XXX This seems to be untested.
 
+    @property
+    def self_and_descendants(self):
+        """Iterate over this PageElement and its children in a
+        breadth-first sequence.
+
+        :yield: A sequence of PageElements.
+        """
+        if not self.hidden:
+            yield self
+        for i in self.descendants:
+            yield i
+
     @property
     def descendants(self):
         """Iterate over all children of this PageElement in a
@@ -1905,16 +2085,13 @@ class Tag(PageElement):
            Beautiful Soup will use the prefixes it encountered while
            parsing the document.
 
-        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+        :param kwargs: Keyword arguments to be passed into Soup Sieve's
            soupsieve.select() method.
 
         :return: A Tag.
         :rtype: bs4.element.Tag
         """
-        value = self.select(selector, namespaces, 1, **kwargs)
-        if value:
-            return value[0]
-        return None
+        return self.css.select_one(selector, namespaces, **kwargs)
 
     def select(self, selector, namespaces=None, limit=None, **kwargs):
         """Perform a CSS selection operation on the current element.
@@ -1930,27 +2107,18 @@ class Tag(PageElement):
 
         :param limit: After finding this number of results, stop looking.
 
-        :param kwargs: Keyword arguments to be passed into SoupSieve's 
+        :param kwargs: Keyword arguments to be passed into SoupSieve's
            soupsieve.select() method.
 
         :return: A ResultSet of Tags.
         :rtype: bs4.element.ResultSet
         """
-        if namespaces is None:
-            namespaces = self._namespaces
-        
-        if limit is None:
-            limit = 0
-        if soupsieve is None:
-            raise NotImplementedError(
-                "Cannot execute CSS selectors because the soupsieve package is not installed."
-            )
-            
-        results = soupsieve.select(selector, self, namespaces, limit, **kwargs)
+        return self.css.select(selector, namespaces, limit, **kwargs)
 
-        # We do this because it's more consistent and because
-        # ResultSet.__getattr__ has a helpful error message.
-        return ResultSet(None, results)
+    @property
+    def css(self):
+        """Return an interface to the CSS selector API."""
+        return CSS(self)
 
     # Old names for backwards compatibility
     def childGenerator(self):
@@ -1967,8 +2135,10 @@ class Tag(PageElement):
 
         has_key() is gone in Python 3, anyway.
         """
-        warnings.warn('has_key is deprecated. Use has_attr("%s") instead.' % (
-                key))
+        warnings.warn(
+            'has_key is deprecated. Use has_attr(key) instead.',
+            DeprecationWarning, stacklevel=2
+        )
         return self.has_attr(key)
 
 # Next, a couple classes to represent queries and their results.
@@ -1982,7 +2152,7 @@ class SoupStrainer(object):
     document.
     """
 
-    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+    def __init__(self, name=None, attrs={}, string=None, **kwargs):
         """Constructor.
 
         The SoupStrainer constructor takes the same arguments passed
@@ -1991,9 +2161,16 @@ class SoupStrainer(object):
 
         :param name: A filter on tag name.
         :param attrs: A dictionary of filters on attribute values.
-        :param text: A filter for a NavigableString with specific text.
+        :param string: A filter for a NavigableString with specific text.
         :kwargs: A dictionary of filters on attribute values.
-        """        
+        """
+        if string is None and 'text' in kwargs:
+            string = kwargs.pop('text')
+            warnings.warn(
+                "The 'text' argument to the SoupStrainer constructor is deprecated. Use 'string' instead.",
+                DeprecationWarning, stacklevel=2
+            )
+
         self.name = self._normalize_search_value(name)
         if not isinstance(attrs, dict):
             # Treat a non-dict value for attrs as a search for the 'class'
@@ -2018,7 +2195,10 @@ class SoupStrainer(object):
             normalized_attrs[key] = self._normalize_search_value(value)
 
         self.attrs = normalized_attrs
-        self.text = self._normalize_search_value(text)
+        self.string = self._normalize_search_value(string)
+
+        # DEPRECATED but just in case someone is checking this.
+        self.text = self.string
 
     def _normalize_search_value(self, value):
         # Leave it alone if it's a Unicode string, a callable, a
@@ -2052,8 +2232,8 @@ class SoupStrainer(object):
 
     def __str__(self):
         """A human-readable representation of this SoupStrainer."""
-        if self.text:
-            return self.text
+        if self.string:
+            return self.string
         else:
             return "%s|%s" % (self.name, self.attrs)
 
@@ -2082,7 +2262,7 @@ class SoupStrainer(object):
             # looking at a tag with a different name.
             if markup and not markup.prefix and self.name != markup.name:
                  return False
-            
+
         call_function_with_tag_data = (
             isinstance(self.name, Callable)
             and not isinstance(markup_name, Tag))
@@ -2113,7 +2293,7 @@ class SoupStrainer(object):
                     found = markup
                 else:
                     found = markup_name
-        if found and self.text and not self._matches(found.string, self.text):
+        if found and self.string and not self._matches(found.string, self.string):
             found = None
         return found
 
@@ -2141,12 +2321,12 @@ class SoupStrainer(object):
         # If it's a Tag, make sure its name or attributes match.
         # Don't bother with Tags if we're searching for text.
         elif isinstance(markup, Tag):
-            if not self.text or self.name or self.attrs:
+            if not self.string or self.name or self.attrs:
                 found = self.search_tag(markup)
         # If it's text, make sure the text matches.
         elif isinstance(markup, NavigableString) or \
                  isinstance(markup, str):
-            if not self.name and not self.attrs and self._matches(markup, self.text):
+            if not self.name and not self.attrs and self._matches(markup, self.string):
                 found = markup
         else:
             raise Exception(
@@ -2168,7 +2348,7 @@ class SoupStrainer(object):
             if self._matches(' '.join(markup), match_against):
                 return True
             return False
-        
+
         if match_against is True:
             # True matches any non-None value.
             return markup is not None
@@ -2212,11 +2392,11 @@ class SoupStrainer(object):
                         return True
             else:
                 return False
-        
+
         # Beyond this point we might need to run the test twice: once against
         # the tag's name and once against its prefixed name.
         match = False
-        
+
         if not match and isinstance(match_against, str):
             # Exact string match
             match = markup == match_against
diff --git a/lib/bs4/formatter.py b/lib/bs4/formatter.py
index 3bd9f859..9fa1b57c 100644
--- a/lib/bs4/formatter.py
+++ b/lib/bs4/formatter.py
@@ -49,9 +49,9 @@ class Formatter(EntitySubstitution):
     def __init__(
             self, language=None, entity_substitution=None,
             void_element_close_prefix='/', cdata_containing_tags=None,
-            empty_attributes_are_booleans=False,
+            empty_attributes_are_booleans=False, indent=1,
     ):
-        """Constructor.
+        r"""Constructor.
 
         :param language: This should be Formatter.XML if you are formatting
            XML markup and Formatter.HTML if you are formatting HTML markup.
@@ -69,6 +69,15 @@ class Formatter(EntitySubstitution):
         :param blank_attributes_are_booleans: Render attributes whose value
             is the empty string as HTML-style boolean attributes.
             (Attributes whose value is None are always rendered this way.)
+
+        :param indent: If indent is a non-negative integer or string,
+            then the contents of elements will be indented
+            appropriately when pretty-printing. An indent level of 0,
+            negative, or "" will only insert newlines. Using a
+            positive integer indent indents that many spaces per
+            level. If indent is a string (such as "\t"), that string
+            is used to indent each level. The default behavior is to
+            indent one space per level.
         """
         self.language = language
         self.entity_substitution = entity_substitution
@@ -77,7 +86,18 @@ class Formatter(EntitySubstitution):
             language, cdata_containing_tags, 'cdata_containing_tags'
         )
         self.empty_attributes_are_booleans=empty_attributes_are_booleans
-        
+        if indent is None:
+            indent = 0
+        if isinstance(indent, int):
+            if indent < 0:
+                indent = 0
+            indent = ' ' * indent
+        elif isinstance(indent, str):
+            indent = indent
+        else:
+            indent = ' '
+        self.indent = indent
+
     def substitute(self, ns):
         """Process a string that needs to undergo entity substitution.
         This may be a string encountered in an attribute value or as
@@ -129,14 +149,14 @@ class HTMLFormatter(Formatter):
     """A generic Formatter for HTML."""
     REGISTRY = {}
     def __init__(self, *args, **kwargs):
-        return super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
+        super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
 
     
 class XMLFormatter(Formatter):
     """A generic Formatter for XML."""
     REGISTRY = {}
     def __init__(self, *args, **kwargs):
-        return super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
+        super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
 
 
 # Set up aliases for the default formatters.
diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py
deleted file mode 100644
index 0db19044..00000000
--- a/lib/bs4/testing.py
+++ /dev/null
@@ -1,680 +0,0 @@
-"""Helper classes for tests."""
-
-import pickle
-import copy
-import functools
-import unittest
-from unittest import TestCase
-from bs4 import BeautifulSoup
-from bs4.element import (
-    CharsetMetaAttributeValue,
-    Comment,
-    ContentMetaAttributeValue,
-    Doctype,
-    SoupStrainer,
-)
-
-from bs4.builder import HTMLParserTreeBuilder
-default_builder = HTMLParserTreeBuilder
-
-
-class SoupTest(unittest.TestCase):
-
-    @property
-    def default_builder(self):
-        return default_builder()
-
-    def soup(self, markup, **kwargs):
-        """Build a Beautiful Soup object from markup."""
-        builder = kwargs.pop('builder', self.default_builder)
-        return BeautifulSoup(markup, builder=builder, **kwargs)
-
-    def document_for(self, markup):
-        """Turn an HTML fragment into a document.
-
-        The details depend on the builder.
-        """
-        return self.default_builder.test_fragment_to_document(markup)
-
-    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
-        builder = self.default_builder
-        obj = BeautifulSoup(to_parse, builder=builder)
-        if compare_parsed_to is None:
-            compare_parsed_to = to_parse
-
-        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
-
-    def assertConnectedness(self, element):
-        """Ensure that next_element and previous_element are properly
-        set for all descendants of the given element.
-        """
-        earlier = None
-        for e in element.descendants:
-            if earlier:
-                self.assertEqual(e, earlier.next_element)
-                self.assertEqual(earlier, e.previous_element)
-            earlier = e
-
-class HTMLTreeBuilderSmokeTest(object):
-
-    """A basic test of a treebuilder's competence.
-
-    Any HTML treebuilder, present or future, should be able to pass
-    these tests. With invalid markup, there's room for interpretation,
-    and different parsers can handle it differently. But with the
-    markup in these tests, there's not much room for interpretation.
-    """
-
-    def test_pickle_and_unpickle_identity(self):
-        # Pickling a tree, then unpickling it, yields a tree identical
-        # to the original.
-        tree = self.soup("<a><b>foo</a>")
-        dumped = pickle.dumps(tree, 2)
-        loaded = pickle.loads(dumped)
-        self.assertEqual(loaded.__class__, BeautifulSoup)
-        self.assertEqual(loaded.decode(), tree.decode())
-
-    def assertDoctypeHandled(self, doctype_fragment):
-        """Assert that a given doctype string is handled correctly."""
-        doctype_str, soup = self._document_with_doctype(doctype_fragment)
-
-        # Make sure a Doctype object was created.
-        doctype = soup.contents[0]
-        self.assertEqual(doctype.__class__, Doctype)
-        self.assertEqual(doctype, doctype_fragment)
-        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
-
-        # Make sure that the doctype was correctly associated with the
-        # parse tree and that the rest of the document parsed.
-        self.assertEqual(soup.p.contents[0], 'foo')
-
-    def _document_with_doctype(self, doctype_fragment):
-        """Generate and parse a document with the given doctype."""
-        doctype = '<!DOCTYPE %s>' % doctype_fragment
-        markup = doctype + '\n<p>foo</p>'
-        soup = self.soup(markup)
-        return doctype, soup
-
-    def test_normal_doctypes(self):
-        """Make sure normal, everyday HTML doctypes are handled correctly."""
-        self.assertDoctypeHandled("html")
-        self.assertDoctypeHandled(
-            'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
-
-    def test_empty_doctype(self):
-        soup = self.soup("<!DOCTYPE>")
-        doctype = soup.contents[0]
-        self.assertEqual("", doctype.strip())
-
-    def test_public_doctype_with_url(self):
-        doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
-        self.assertDoctypeHandled(doctype)
-
-    def test_system_doctype(self):
-        self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
-
-    def test_namespaced_system_doctype(self):
-        # We can handle a namespaced doctype with a system ID.
-        self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
-
-    def test_namespaced_public_doctype(self):
-        # Test a namespaced doctype with a public id.
-        self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
-
-    def test_real_xhtml_document(self):
-        """A real XHTML document should come out more or less the same as it went in."""
-        markup = b"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>Hello.</title></head>
-<body>Goodbye.</body>
-</html>"""
-        soup = self.soup(markup)
-        self.assertEqual(
-            soup.encode("utf-8").replace(b"\n", b""),
-            markup.replace(b"\n", b""))
-
-    def test_processing_instruction(self):
-        markup = b"""<?PITarget PIContent?>"""
-        soup = self.soup(markup)
-        self.assertEqual(markup, soup.encode("utf8"))
-
-    def test_deepcopy(self):
-        """Make sure you can copy the tree builder.
-
-        This is important because the builder is part of a
-        BeautifulSoup object, and we want to be able to copy that.
-        """
-        copy.deepcopy(self.default_builder)
-
-    def test_p_tag_is_never_empty_element(self):
-        """A <p> tag is never designated as an empty-element tag.
-
-        Even if the markup shows it as an empty-element tag, it
-        shouldn't be presented that way.
-        """
-        soup = self.soup("<p/>")
-        self.assertFalse(soup.p.is_empty_element)
-        self.assertEqual(str(soup.p), "<p></p>")
-
-    def test_unclosed_tags_get_closed(self):
-        """A tag that's not closed by the end of the document should be closed.
-
-        This applies to all tags except empty-element tags.
-        """
-        self.assertSoupEquals("<p>", "<p></p>")
-        self.assertSoupEquals("<b>", "<b></b>")
-
-        self.assertSoupEquals("<br>", "<br/>")
-
-    def test_br_is_always_empty_element_tag(self):
-        """A <br> tag is designated as an empty-element tag.
-
-        Some parsers treat <br></br> as one <br/> tag, some parsers as
-        two tags, but it should always be an empty-element tag.
-        """
-        soup = self.soup("<br></br>")
-        self.assertTrue(soup.br.is_empty_element)
-        self.assertEqual(str(soup.br), "<br/>")
-
-    def test_nested_formatting_elements(self):
-        self.assertSoupEquals("<em><em></em></em>")
-
-    def test_double_head(self):
-        html = '''<!DOCTYPE html>
-<html>
-<head>
-<title>Ordinary HEAD element test</title>
-</head>
-<script type="text/javascript">
-alert("Help!");
-</script>
-<body>
-Hello, world!
-</body>
-</html>
-'''
-        soup = self.soup(html)
-        self.assertEqual("text/javascript", soup.find('script')['type'])
-
-    def test_comment(self):
-        # Comments are represented as Comment objects.
-        markup = "<p>foo<!--foobar-->baz</p>"
-        self.assertSoupEquals(markup)
-
-        soup = self.soup(markup)
-        comment = soup.find(text="foobar")
-        self.assertEqual(comment.__class__, Comment)
-
-        # The comment is properly integrated into the tree.
-        foo = soup.find(text="foo")
-        self.assertEqual(comment, foo.next_element)
-        baz = soup.find(text="baz")
-        self.assertEqual(comment, baz.previous_element)
-
-    def test_preserved_whitespace_in_pre_and_textarea(self):
-        """Whitespace must be preserved in <pre> and <textarea> tags."""
-        self.assertSoupEquals("<pre>   </pre>")
-        self.assertSoupEquals("<textarea> woo  </textarea>")
-
-    def test_nested_inline_elements(self):
-        """Inline elements can be nested indefinitely."""
-        b_tag = "<b>Inside a B tag</b>"
-        self.assertSoupEquals(b_tag)
-
-        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
-        self.assertSoupEquals(nested_b_tag)
-
-    def test_nested_block_level_elements(self):
-        """Block elements can be nested."""
-        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
-        blockquote = soup.blockquote
-        self.assertEqual(blockquote.p.b.string, 'Foo')
-        self.assertEqual(blockquote.b.string, 'Foo')
-
-    def test_correctly_nested_tables(self):
-        """One table can go inside another one."""
-        markup = ('<table id="1">'
-                  '<tr>'
-                  "<td>Here's another table:"
-                  '<table id="2">'
-                  '<tr><td>foo</td></tr>'
-                  '</table></td>')
-
-        self.assertSoupEquals(
-            markup,
-            '<table id="1"><tr><td>Here\'s another table:'
-            '<table id="2"><tr><td>foo</td></tr></table>'
-            '</td></tr></table>')
-
-        self.assertSoupEquals(
-            "<table><thead><tr><td>Foo</td></tr></thead>"
-            "<tbody><tr><td>Bar</td></tr></tbody>"
-            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
-
-    def test_deeply_nested_multivalued_attribute(self):
-        # html5lib can set the attributes of the same tag many times
-        # as it rearranges the tree. This has caused problems with
-        # multivalued attributes.
-        markup = '<table><div><div class="css"></div></div></table>'
-        soup = self.soup(markup)
-        self.assertEqual(["css"], soup.div.div['class'])
-
-    def test_multivalued_attribute_on_html(self):
-        # html5lib uses a different API to set the attributes ot the
-        # <html> tag. This has caused problems with multivalued
-        # attributes.
-        markup = '<html class="a b"></html>'
-        soup = self.soup(markup)
-        self.assertEqual(["a", "b"], soup.html['class'])
-
-    def test_angle_brackets_in_attribute_values_are_escaped(self):
-        self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
-
-    def test_entities_in_attributes_converted_to_unicode(self):
-        expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
-        self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
-        self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
-        self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
-        self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
-
-    def test_entities_in_text_converted_to_unicode(self):
-        expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
-        self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
-        self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
-        self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
-        self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
-
-    def test_quot_entity_converted_to_quotation_mark(self):
-        self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
-                              '<p>I said "good day!"</p>')
-
-    def test_out_of_range_entity(self):
-        expect = "\N{REPLACEMENT CHARACTER}"
-        self.assertSoupEquals("&#10000000000000;", expect)
-        self.assertSoupEquals("&#x10000000000000;", expect)
-        self.assertSoupEquals("&#1000000000;", expect)
-
-    def test_multipart_strings(self):
-        "Mostly to prevent a recurrence of a bug in the html5lib treebuilder."
-        soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
-        self.assertEqual("p", soup.h2.string.next_element.name)
-        self.assertEqual("p", soup.p.name)
-        self.assertConnectedness(soup)
-
-    def test_head_tag_between_head_and_body(self):
-        "Prevent recurrence of a bug in the html5lib treebuilder."
-        content = """<html><head></head>
-  <link></link>
-  <body>foo</body>
-</html>
-"""
-        soup = self.soup(content)
-        self.assertNotEqual(None, soup.html.body)
-        self.assertConnectedness(soup)
-
-    def test_multiple_copies_of_a_tag(self):
-        "Prevent recurrence of a bug in the html5lib treebuilder."
-        content = """<!DOCTYPE html>
-<html>
- <body>
-   <article id="a" >
-   <div><a href="1"></div>
-   <footer>
-     <a href="2"></a>
-   </footer>
-  </article>
-  </body>
-</html>
-"""
-        soup = self.soup(content)
-        self.assertConnectedness(soup.article)
-
-    def test_basic_namespaces(self):
-        """Parsers don't need to *understand* namespaces, but at the
-        very least they should not choke on namespaces or lose
-        data."""
-
-        markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
-        soup = self.soup(markup)
-        self.assertEqual(markup, soup.encode())
-        html = soup.html
-        self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
-        self.assertEqual(
-            'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
-        self.assertEqual(
-            'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
-
-    def test_multivalued_attribute_value_becomes_list(self):
-        markup = b'<a class="foo bar">'
-        soup = self.soup(markup)
-        self.assertEqual(['foo', 'bar'], soup.a['class'])
-
-    #
-    # Generally speaking, tests below this point are more tests of
-    # Beautiful Soup than tests of the tree builders. But parsers are
-    # weird, so we run these tests separately for every tree builder
-    # to detect any differences between them.
-    #
-
-    def test_can_parse_unicode_document(self):
-        # A seemingly innocuous document... but it's in Unicode! And
-        # it contains characters that can't be represented in the
-        # encoding found in the  declaration! The horror!
-        markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
-        soup = self.soup(markup)
-        self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
-
-    def test_soupstrainer(self):
-        """Parsers should be able to work with SoupStrainers."""
-        strainer = SoupStrainer("b")
-        soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
-                         parse_only=strainer)
-        self.assertEqual(soup.decode(), "<b>bold</b>")
-
-    def test_single_quote_attribute_values_become_double_quotes(self):
-        self.assertSoupEquals("<foo attr='bar'></foo>",
-                              '<foo attr="bar"></foo>')
-
-    def test_attribute_values_with_nested_quotes_are_left_alone(self):
-        text = """<foo attr='bar "brawls" happen'>a</foo>"""
-        self.assertSoupEquals(text)
-
-    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
-        text = """<foo attr='bar "brawls" happen'>a</foo>"""
-        soup = self.soup(text)
-        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
-        self.assertSoupEquals(
-            soup.foo.decode(),
-            """<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
-
-    def test_ampersand_in_attribute_value_gets_escaped(self):
-        self.assertSoupEquals('<this is="really messed up & stuff"></this>',
-                              '<this is="really messed up &amp; stuff"></this>')
-
-        self.assertSoupEquals(
-            '<a href="http://example.org?a=1&b=2;3">foo</a>',
-            '<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
-
-    def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
-        self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
-
-    def test_entities_in_strings_converted_during_parsing(self):
-        # Both XML and HTML entities are converted to Unicode characters
-        # during parsing.
-        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
-        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
-        self.assertSoupEquals(text, expected)
-
-    def test_smart_quotes_converted_on_the_way_in(self):
-        # Microsoft smart quotes are converted to Unicode characters during
-        # parsing.
-        quote = b"<p>\x91Foo\x92</p>"
-        soup = self.soup(quote)
-        self.assertEqual(
-            soup.p.string,
-            "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
-
-    def test_non_breaking_spaces_converted_on_the_way_in(self):
-        soup = self.soup("<a>&nbsp;&nbsp;</a>")
-        self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
-
-    def test_entities_converted_on_the_way_out(self):
-        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
-        expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
-        soup = self.soup(text)
-        self.assertEqual(soup.p.encode("utf-8"), expected)
-
-    def test_real_iso_latin_document(self):
-        # Smoke test of interrelated functionality, using an
-        # easy-to-understand document.
-
-        # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
-        unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
-
-        # That's because we're going to encode it into ISO-Latin-1, and use
-        # that to test.
-        iso_latin_html = unicode_html.encode("iso-8859-1")
-
-        # Parse the ISO-Latin-1 HTML.
-        soup = self.soup(iso_latin_html)
-        # Encode it to UTF-8.
-        result = soup.encode("utf-8")
-
-        # What do we expect the result to look like? Well, it would
-        # look like unicode_html, except that the META tag would say
-        # UTF-8 instead of ISO-Latin-1.
-        expected = unicode_html.replace("ISO-Latin-1", "utf-8")
-
-        # And, of course, it would be in UTF-8, not Unicode.
-        expected = expected.encode("utf-8")
-
-        # Ta-da!
-        self.assertEqual(result, expected)
-
-    def test_real_shift_jis_document(self):
-        # Smoke test to make sure the parser can handle a document in
-        # Shift-JIS encoding, without choking.
-        shift_jis_html = (
-            b'<html><head></head><body><pre>'
-            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
-            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
-            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
-            b'</pre></body></html>')
-        unicode_html = shift_jis_html.decode("shift-jis")
-        soup = self.soup(unicode_html)
-
-        # Make sure the parse tree is correctly encoded to various
-        # encodings.
-        self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
-        self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
-
-    def test_real_hebrew_document(self):
-        # A real-world test to make sure we can convert ISO-8859-9 (a
-        # Hebrew encoding) to UTF-8.
-        hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
-        soup = self.soup(
-            hebrew_document, from_encoding="iso8859-8")
-        self.assertEqual(soup.original_encoding, 'iso8859-8')
-        self.assertEqual(
-            soup.encode('utf-8'),
-            hebrew_document.decode("iso8859-8").encode("utf-8"))
-
-    def test_meta_tag_reflects_current_encoding(self):
-        # Here's the <meta> tag saying that a document is
-        # encoded in Shift-JIS.
-        meta_tag = ('<meta content="text/html; charset=x-sjis" '
-                    'http-equiv="Content-type"/>')
-
-        # Here's a document incorporating that meta tag.
-        shift_jis_html = (
-            '<html><head>\n%s\n'
-            '<meta http-equiv="Content-language" content="ja"/>'
-            '</head><body>Shift-JIS markup goes here.') % meta_tag
-        soup = self.soup(shift_jis_html)
-
-        # Parse the document, and the charset is seemingly unaffected.
-        parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
-        content = parsed_meta['content']
-        self.assertEqual('text/html; charset=x-sjis', content)
-
-        # But that value is actually a ContentMetaAttributeValue object.
-        self.assertTrue(isinstance(content, ContentMetaAttributeValue))
-
-        # And it will take on a value that reflects its current
-        # encoding.
-        self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
-
-        # For the rest of the story, see TestSubstitutions in
-        # test_tree.py.
-
-    def test_html5_style_meta_tag_reflects_current_encoding(self):
-        # Here's the <meta> tag saying that a document is
-        # encoded in Shift-JIS.
-        meta_tag = ('<meta id="encoding" charset="x-sjis" />')
-
-        # Here's a document incorporating that meta tag.
-        shift_jis_html = (
-            '<html><head>\n%s\n'
-            '<meta http-equiv="Content-language" content="ja"/>'
-            '</head><body>Shift-JIS markup goes here.') % meta_tag
-        soup = self.soup(shift_jis_html)
-
-        # Parse the document, and the charset is seemingly unaffected.
-        parsed_meta = soup.find('meta', id="encoding")
-        charset = parsed_meta['charset']
-        self.assertEqual('x-sjis', charset)
-
-        # But that value is actually a CharsetMetaAttributeValue object.
-        self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
-
-        # And it will take on a value that reflects its current
-        # encoding.
-        self.assertEqual('utf8', charset.encode("utf8"))
-
-    def test_tag_with_no_attributes_can_have_attributes_added(self):
-        data = self.soup("<a>text</a>")
-        data.a['foo'] = 'bar'
-        self.assertEqual('<a foo="bar">text</a>', data.a.decode())
-
-class XMLTreeBuilderSmokeTest(object):
-
-    def test_pickle_and_unpickle_identity(self):
-        # Pickling a tree, then unpickling it, yields a tree identical
-        # to the original.
-        tree = self.soup("<a><b>foo</a>")
-        dumped = pickle.dumps(tree, 2)
-        loaded = pickle.loads(dumped)
-        self.assertEqual(loaded.__class__, BeautifulSoup)
-        self.assertEqual(loaded.decode(), tree.decode())
-
-    def test_docstring_generated(self):
-        soup = self.soup("<root/>")
-        self.assertEqual(
-            soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
-
-    def test_real_xhtml_document(self):
-        """A real XHTML document should come out *exactly* the same as it went in."""
-        markup = b"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>Hello.</title></head>
-<body>Goodbye.</body>
-</html>"""
-        soup = self.soup(markup)
-        self.assertEqual(
-            soup.encode("utf-8"), markup)
-
-    def test_formatter_processes_script_tag_for_xml_documents(self):
-        doc = """
-  <script type="text/javascript">
-  </script>
-"""
-        soup = BeautifulSoup(doc, "lxml-xml")
-        # lxml would have stripped this while parsing, but we can add
-        # it later.
-        soup.script.string = 'console.log("< < hey > > ");'
-        encoded = soup.encode()
-        self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
-
-    def test_can_parse_unicode_document(self):
-        markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
-        soup = self.soup(markup)
-        self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
-
-    def test_popping_namespaced_tag(self):
-        markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
-        soup = self.soup(markup)
-        self.assertEqual(
-            str(soup.rss), markup)
-
-    def test_docstring_includes_correct_encoding(self):
-        soup = self.soup("<root/>")
-        self.assertEqual(
-            soup.encode("latin1"),
-            b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
-
-    def test_large_xml_document(self):
-        """A large XML document should come out the same as it went in."""
-        markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
-                  + b'0' * (2**12)
-                  + b'</root>')
-        soup = self.soup(markup)
-        self.assertEqual(soup.encode("utf-8"), markup)
-
-
-    def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
-        self.assertSoupEquals("<p>", "<p/>")
-        self.assertSoupEquals("<p>foo</p>")
-
-    def test_namespaces_are_preserved(self):
-        markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
-        soup = self.soup(markup)
-        root = soup.root
-        self.assertEqual("http://example.com/", root['xmlns:a'])
-        self.assertEqual("http://example.net/", root['xmlns:b'])
-
-    def test_closing_namespaced_tag(self):
-        markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
-        soup = self.soup(markup)
-        self.assertEqual(str(soup.p), markup)
-
-    def test_namespaced_attributes(self):
-        markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
-        soup = self.soup(markup)
-        self.assertEqual(str(soup.foo), markup)
-
-    def test_namespaced_attributes_xml_namespace(self):
-        markup = '<foo xml:lang="fr">bar</foo>'
-        soup = self.soup(markup)
-        self.assertEqual(str(soup.foo), markup)
-
-class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
-    """Smoke test for a tree builder that supports HTML5."""
-
-    def test_real_xhtml_document(self):
-        # Since XHTML is not HTML5, HTML5 parsers are not tested to handle
-        # XHTML documents in any particular way.
-        pass
-
-    def test_html_tags_have_namespace(self):
-        markup = "<a>"
-        soup = self.soup(markup)
-        self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
-
-    def test_svg_tags_have_namespace(self):
-        markup = '<svg><circle/></svg>'
-        soup = self.soup(markup)
-        namespace = "http://www.w3.org/2000/svg"
-        self.assertEqual(namespace, soup.svg.namespace)
-        self.assertEqual(namespace, soup.circle.namespace)
-
-
-    def test_mathml_tags_have_namespace(self):
-        markup = '<math><msqrt>5</msqrt></math>'
-        soup = self.soup(markup)
-        namespace = 'http://www.w3.org/1998/Math/MathML'
-        self.assertEqual(namespace, soup.math.namespace)
-        self.assertEqual(namespace, soup.msqrt.namespace)
-
-    def test_xml_declaration_becomes_comment(self):
-        markup = '<?xml version="1.0" encoding="utf-8"?><html></html>'
-        soup = self.soup(markup)
-        self.assertTrue(isinstance(soup.contents[0], Comment))
-        self.assertEqual(soup.contents[0], '?xml version="1.0" encoding="utf-8"?')
-        self.assertEqual("html", soup.contents[0].next_element.name)
-
-def skipIf(condition, reason):
-   def nothing(test, *args, **kwargs):
-       return None
-
-   def decorator(test_item):
-       if condition:
-           return nothing
-       else:
-           return test_item
-
-   return decorator