From 961773238aec4f3aed9b6302f67afca7b6a699df Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Thu, 23 Aug 2012 12:48:17 +1200
Subject: [PATCH 1/6] rutracker search provider

Added rutracker as a torrent search provider.
BeautifulSoup 4 needs to be installed to parse the results.
---
 data/interfaces/default/config.html    |   14 +
 data/interfaces/default/history.html   |    2 +
 headphones/__init__.py                 |   15 +-
 headphones/searcher.py                 |   85 +-
 headphones/searcher_rutracker.py       |  253 ++++
 headphones/webserve.py                 |    8 +-
 lib/bs4/__init__.py                    |  355 +++++
 lib/bs4/builder/__init__.py            |  307 +++++
 lib/bs4/builder/_html5lib.py           |  222 ++++
 lib/bs4/builder/_htmlparser.py         |  244 ++++
 lib/bs4/builder/_lxml.py               |  179 +++
 lib/bs4/dammit.py                      |  792 +++++++++++
 lib/bs4/element.py                     | 1347 +++++++++++++++++++
 lib/bs4/testing.py                     |  515 +++++++
 lib/bs4/tests/__init__.py              |    1 +
 lib/bs4/tests/test_builder_registry.py |  141 ++
 lib/bs4/tests/test_docs.py             |   36 +
 lib/bs4/tests/test_html5lib.py         |   58 +
 lib/bs4/tests/test_htmlparser.py       |   19 +
 lib/bs4/tests/test_lxml.py             |   75 ++
 lib/bs4/tests/test_soup.py             |  368 +++++
 lib/bs4/tests/test_tree.py             | 1695 ++++++++++++++++++++++++
 22 files changed, 6716 insertions(+), 15 deletions(-)
 create mode 100644 headphones/searcher_rutracker.py
 create mode 100644 lib/bs4/__init__.py
 create mode 100644 lib/bs4/builder/__init__.py
 create mode 100644 lib/bs4/builder/_html5lib.py
 create mode 100644 lib/bs4/builder/_htmlparser.py
 create mode 100644 lib/bs4/builder/_lxml.py
 create mode 100644 lib/bs4/dammit.py
 create mode 100644 lib/bs4/element.py
 create mode 100644 lib/bs4/testing.py
 create mode 100644 lib/bs4/tests/__init__.py
 create mode 100644 lib/bs4/tests/test_builder_registry.py
 create mode 100644 lib/bs4/tests/test_docs.py
 create mode 100644 lib/bs4/tests/test_html5lib.py
 create mode 100644 lib/bs4/tests/test_htmlparser.py
 create mode 100644 lib/bs4/tests/test_lxml.py
 create mode 100644 lib/bs4/tests/test_soup.py
 create mode 100644 lib/bs4/tests/test_tree.py
diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html
index 75219f33..d599471b 100644
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@@ -302,6 +302,19 @@ m<%inherit file="base.html"/>
                                 <input type="text" name="waffles_passkey" value="${config['waffles_passkey']}" size="36">
                             </div>
                         </div>
+                        <div class="row checkbox">
+                            <input id="userutracker" type="checkbox" name="rutracker" onclick="initConfigCheckbox($(this));" value="1" ${config['use_rutracker']} /><label>rutracker.org</label>
+                        </div>
+                        <div class="config">
+                            <div class="row">
+                                <label>rutracker User Name: </label>
+                                <input type="text" name="rutracker_user" value="${config['rutracker_user']}" size="36">
+                            </div>
+                            <div class="row">
+                                <label>rutracker Password: </label>
+                                <input type="text" name="rutracker_password" value="${config['rutracker_password']}" size="36">
+                            </div>
+                        </div>
                     </fieldset>
                     
                 </td>
@@ -923,6 +936,7 @@ m<%inherit file="base.html"/>
 		initConfigCheckbox("#usenewzbin");
 		initConfigCheckbox("#usenzbsorg");
 		initConfigCheckbox("#usewaffles");
+		initConfigCheckbox("#userutracker");
 		initConfigCheckbox("#useblackhole");
 		initConfigCheckbox("#useapi");
 	}
diff --git a/data/interfaces/default/history.html b/data/interfaces/default/history.html
index 162dab2d..616459cc 100644
--- a/data/interfaces/default/history.html
+++ b/data/interfaces/default/history.html
@@ -45,6 +45,8 @@
 					fileid = 'nzb'
 				if item['URL'].find('torrent') != -1:
 					fileid = 'torrent'
+				if item['URL'].find('rutracker') != -1:
+					fileid = 'torrent'
 			%>
 			<tr class="grade${grade}">
 				<td id="dateadded">${item['DateAdded']}</td>
diff --git a/headphones/__init__.py b/headphones/__init__.py
index 3126e793..1157f26e 100644
--- a/headphones/__init__.py
+++ b/headphones/__init__.py
@@ -154,6 +154,9 @@ MININOVA = None
 WAFFLES = None
 WAFFLES_UID = None
 WAFFLES_PASSKEY = None
+RUTRACKER = None
+RUTRACKER_USER = None
+RUTRACKER_PASSWORD = None
 DOWNLOAD_TORRENT_DIR = None
 
 INTERFACE = None
@@ -247,7 +250,7 @@ def initialize():
                 LOSSLESS_DESTINATION_DIR, PREFERRED_QUALITY, PREFERRED_BITRATE, DETECT_BITRATE, ADD_ARTISTS, CORRECT_METADATA, MOVE_FILES, \
                 RENAME_FILES, FOLDER_FORMAT, FILE_FORMAT, CLEANUP_FILES, INCLUDE_EXTRAS, EXTRAS, AUTOWANT_UPCOMING, AUTOWANT_ALL, \
                 ADD_ALBUM_ART, EMBED_ALBUM_ART, EMBED_LYRICS, DOWNLOAD_DIR, BLACKHOLE, BLACKHOLE_DIR, USENET_RETENTION, SEARCH_INTERVAL, \
-                TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, DOWNLOAD_TORRENT_DIR, \
+                TORRENTBLACKHOLE_DIR, NUMBEROFSEEDERS, ISOHUNT, KAT, MININOVA, WAFFLES, WAFFLES_UID, WAFFLES_PASSKEY, RUTRACKER, RUTRACKER_USER, RUTRACKER_PASSWORD, DOWNLOAD_TORRENT_DIR, \
                 LIBRARYSCAN_INTERVAL, DOWNLOAD_SCAN_INTERVAL, SAB_HOST, SAB_USERNAME, SAB_PASSWORD, SAB_APIKEY, SAB_CATEGORY, \
                 NZBMATRIX, NZBMATRIX_USERNAME, NZBMATRIX_APIKEY, NEWZNAB, NEWZNAB_HOST, NEWZNAB_APIKEY, NEWZNAB_ENABLED, EXTRA_NEWZNABS,\
                 NZBSORG, NZBSORG_UID, NZBSORG_HASH, NEWZBIN, NEWZBIN_UID, NEWZBIN_PASSWORD, LASTFM_USERNAME, INTERFACE, FOLDER_PERMISSIONS, \
@@ -268,6 +271,7 @@ def initialize():
         CheckSection('NZBsorg')
         CheckSection('Newzbin')
         CheckSection('Waffles')
+        CheckSection('Rutracker')
         CheckSection('Prowl')
         CheckSection('XBMC')
         CheckSection('NMA')
@@ -341,6 +345,10 @@ def initialize():
         WAFFLES = bool(check_setting_int(CFG, 'Waffles', 'waffles', 0))
         WAFFLES_UID = check_setting_str(CFG, 'Waffles', 'waffles_uid', '')
         WAFFLES_PASSKEY = check_setting_str(CFG, 'Waffles', 'waffles_passkey', '')
+        
+        RUTRACKER = bool(check_setting_int(CFG, 'Rutracker', 'rutracker', 0))
+        RUTRACKER_USER = check_setting_str(CFG, 'Rutracker', 'rutracker_user', '')
+        RUTRACKER_PASSWORD = check_setting_str(CFG, 'Rutracker', 'rutracker_password', '')
 
         SAB_HOST = check_setting_str(CFG, 'SABnzbd', 'sab_host', '')
         SAB_USERNAME = check_setting_str(CFG, 'SABnzbd', 'sab_username', '')
@@ -618,6 +626,11 @@ def config_write():
     new_config['Waffles']['waffles'] = int(WAFFLES)
     new_config['Waffles']['waffles_uid'] = WAFFLES_UID
     new_config['Waffles']['waffles_passkey'] = WAFFLES_PASSKEY
+    
+    new_config['Rutracker'] = {}
+    new_config['Rutracker']['rutracker'] = int(RUTRACKER)
+    new_config['Rutracker']['rutracker_user'] = RUTRACKER_USER
+    new_config['Rutracker']['rutracker_password'] = RUTRACKER_PASSWORD
 
     new_config['General']['search_interval'] = SEARCH_INTERVAL
     new_config['General']['libraryscan_interval'] = LIBRARYSCAN_INTERVAL
diff --git a/headphones/searcher.py b/headphones/searcher.py
index 7470c790..643b6afa 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -28,6 +28,9 @@ from headphones import logger, db, helpers, classes, sab
 
 import lib.bencode as bencode
 
+import headphones.searcher_rutracker as rutrackersearch
+rutracker = rutrackersearch.Rutracker()
+
 class NewzbinDownloader(urllib.FancyURLopener):
 
     def __init__(self):
@@ -97,7 +100,7 @@ def searchforalbum(albumid=None, new=False, lossless=False):
                 else:
                     foundNZB = searchNZB(result['AlbumID'], new)
 
-            if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES) and foundNZB == "none":
+            if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER) and foundNZB == "none":
                 if result['Status'] == "Wanted Lossless":
                     searchTorrent(result['AlbumID'], new, losslessOnly=True)
                 else:
@@ -109,7 +112,7 @@ def searchforalbum(albumid=None, new=False, lossless=False):
         if (headphones.NZBMATRIX or headphones.NEWZNAB or headphones.NZBSORG or headphones.NEWZBIN) and (headphones.SAB_HOST or headphones.BLACKHOLE):
             foundNZB = searchNZB(albumid, new, lossless)
 
-        if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES) and foundNZB == "none":
+        if (headphones.KAT or headphones.ISOHUNT or headphones.MININOVA or headphones.WAFFLES or headphones.RUTRACKER) and foundNZB == "none":
             searchTorrent(albumid, new, lossless)
 
 def searchNZB(albumid=None, new=False, losslessOnly=False):
@@ -632,6 +635,13 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
         results = myDB.select('SELECT ArtistName, AlbumTitle, AlbumID, ReleaseDate from albums WHERE Status="Wanted" OR Status="Wanted Lossless"')
         new = True
         
+    # rutracker login
+    
+    if headphones.RUTRACKER and results:
+        rulogin = rutracker.login(headphones.RUTRACKER_USER, headphones.RUTRACKER_PASSWORD)
+        if not rulogin:
+            logger.info(u'Could not login to rutracker, search results will exclude this provider')
+    
     for albums in results:
         
         albumid = albums[2]
@@ -806,7 +816,46 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
                         except Exception, e:
                             logger.error(u"An error occurred while trying to parse the response from Waffles.fm: %s" % e)
 
-
+        
+        # rutracker.org
+        
+        if headphones.RUTRACKER and rulogin:
+        
+            provider = "rutracker.org"
+            bitrate = False
+            
+            if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
+                format = 'lossless'
+                maxsize = 10000000000
+            elif headphones.PREFERRED_QUALITY == 1:
+                format = 'lossless+mp3'
+                maxsize = 10000000000
+            else:
+                format = 'mp3'
+                maxsize = 300000000
+                if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
+                    bitrate = True
+                
+            # build search url based on above
+            
+            searchURL = rutracker.searchurl(artistterm, albumterm, year, format)
+            logger.info(u'Parsing results from <a href="%s">rutracker.org</a>' % searchURL)
+            
+            # parse results and get best match
+            rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate)
+            
+            # add best match to overall results list
+            
+            if rulist:
+                for ru in rulist:
+                    title = ru[0]
+                    size = ru[1]
+                    url = ru[2]
+                    resultlist.append((title, size, url, provider))
+                    logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+            else:
+                logger.info(u"No valid results found from %s" % (provider))
+                
 
         if headphones.ISOHUNT:
             provider = "isoHunt"    
@@ -1029,19 +1078,24 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
                 
                     # Get torrent name from .torrent, this is usually used by the torrent client as the folder name
 
-
                     torrent_name = torrent_folder_name + '.torrent'
                     download_path = os.path.join(headphones.TORRENTBLACKHOLE_DIR, torrent_name)
                     try:
-			#Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name.
-			torrent_file = open(download_path, 'wb')
-			torrent_file.write(data)
-			torrent_file.close()
-			#Open the fresh torrent file again so we can extract the proper torrent name
-			#Used later in post-processing.
-			torrent_file = open(download_path, 'rb')
+                        if bestqual[3] == 'rutracker.org':
+			                download_path = rutracker.get_torrent(bestqual[2], headphones.TORRENTBLACKHOLE_DIR)
+			                if not download_path:
+			                    break
+                        else:  
+			                #Write the torrent file to a path derived from the TORRENTBLACKHOLE_DIR and file name.
+			                torrent_file = open(download_path, 'wb')
+			                torrent_file.write(data)
+			                torrent_file.close()
+			                
+			            #Open the fresh torrent file again so we can extract the proper torrent name
+			            #Used later in post-processing.
+                        torrent_file = open(download_path, 'rb')
                         torrent_info = bencode.bdecode(torrent_file.read())
-			torrent_file.close()
+                        torrent_file.close()
                         torrent_folder_name = torrent_info['info'].get('name','').decode('utf-8')
                         logger.info('Torrent folder name: %s' % torrent_folder_name)
                     except Exception, e:
@@ -1058,7 +1112,12 @@ def preprocesstorrent(resultlist):
             selresult = result
         elif int(selresult[1]) < int(result[1]): # if size is lower than new result replace previous selected result (bigger size = better quality?)
             selresult = result
-            
+             
+    # get outta here if rutracker
+        
+    if selresult[3] == 'rutracker.org':
+        return True, selresult
+                   
     try:
         request = urllib2.Request(selresult[2])
         request.add_header('Accept-encoding', 'gzip')
diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py
new file mode 100644
index 00000000..dcdd60a3
--- /dev/null
+++ b/headphones/searcher_rutracker.py
@@ -0,0 +1,253 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+# Headphones rutracker.org search
+# Functions called from searcher.py
+# Requires BeautifulSoup 4 for parsing http://www.crummy.com/software/BeautifulSoup/
+
+import urllib
+import urllib2
+import cookielib
+from urlparse import urlparse
+from bs4 import BeautifulSoup
+from headphones import logger, db
+import lib.bencode as bencode
+import os
+
+class Rutracker():
+
+    logged_in = False
+    # Stores a number of login attempts to prevent recursion.
+    login_counter = 0
+    
+    def __init__(self):
+
+        self.cookiejar = cookielib.CookieJar()
+        self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiejar))
+        urllib2.install_opener(self.opener)
+
+    def login(self, login, password):
+        """Implements tracker login procedure."""
+        
+        self.logged_in = False
+
+        if login is None or password is None:
+            return False
+
+        self.login_counter += 1
+        
+        # No recursion wanted.
+        #if self.login_counter > 1:
+        #    return False
+        
+        params = urllib.urlencode({"login_username" : login,
+                                   "login_password" : password,
+                                   "login" : "Вход"})
+
+        try:
+            self.opener.open("http://login.rutracker.org/forum/login.php", params)
+        except :
+            pass
+            
+        # Check if we're logged in
+        
+        for cookie in self.cookiejar:
+            if cookie.name == 'bb_data':
+                self.logged_in = True
+        
+        return self.logged_in
+
+    def searchurl(self, artist, album, year, format):
+        """
+        Return the search url
+        """
+         
+        # Build search url
+        
+        searchterm = artist
+        searchterm = searchterm + ' '
+        searchterm = searchterm + album
+        searchterm = searchterm + ' '
+        searchterm = searchterm + year
+        
+        providerurl = "http://rutracker.org/forum/tracker.php"
+        
+        if format == 'lossless':
+            format = '+lossless'
+        elif format == 'lossless+mp3':
+            format = '+lossless||mp3||aac'
+        else:
+            format = '+mp3||aac'
+            
+        # sort by size, descending. 
+        
+        sort = '&o=7&s=2'
+        
+        searchurl = "%s?nm=%s%s%s" % (providerurl, urllib.quote(searchterm), format, sort)
+        
+        return searchurl
+    
+    def search(self, searchurl, maxsize, minseeders, albumid, bitrate):
+        """
+        Parse the search results and return the first valid torrent
+        """
+        
+        titles = []
+        urls = []
+        seeders = []
+        sizes = []
+        torrentlist = [] 
+        rulist = []
+        
+        try:
+            
+            page = self.opener.open(searchurl, timeout=60)
+            soup = BeautifulSoup(page.read(), from_encoding="utf-8")
+            
+            # Debug
+            #logger.debug (soup.prettify()) 
+            
+            # Title
+             
+            for link in soup.find_all('a', attrs={'class' : 'med tLink bold'}): 
+                title = link.get_text()
+                titles.append(title)
+            
+            # Download URL
+
+            for link in soup.find_all('a', attrs={'class' : 'small tr-dl dl-stub'}):
+                url = link.get('href')
+                urls.append(url)
+                
+            # Seeders
+             
+            for link in soup.find_all('td', attrs={'class' : 'row4 seedmed'}): 
+                seeder = link.get_text()
+                seeders.append(seeder)
+            
+            # Size
+             
+            for link in soup.find_all('td', attrs={'class' : 'row4 small nowrap tor-size'}): 
+                size = link.u.string
+                sizes.append(size)
+                
+        except :
+            pass
+            
+        # Combine lists
+        
+        torrentlist = zip(titles, urls, seeders, sizes)
+        
+        # return if nothing found
+        
+        if not torrentlist:
+            return False
+            
+         # get headphones track count for album, return if not found
+        
+        hptrackcount = 0
+        
+        myDB = db.DBConnection()
+        tracks = myDB.select('SELECT TrackTitle from tracks WHERE AlbumID=?', [albumid])
+        for track in tracks:
+            hptrackcount += 1
+        
+        if not hptrackcount:
+            logger.info('headphones track info not found, cannot compare to torrent') 
+            return False
+        
+        # Return the first valid torrent, unless we want a preferred bitrate then we want all valid entries
+       
+        for torrent in torrentlist:
+            
+            title = torrent[0]
+            url = torrent[1]
+            seeders = torrent[2]
+            size = torrent[3]
+            
+            # Attempt to filter out unwanted
+            
+            if 'Promo' not in title and 'promo' not in title and 'Vinyl' not in title and 'vinyl' not in title \
+              and 'ongbook' not in title and 'TVRip' not in title and 'HDTV' not in title and 'DVD' not in title \
+              and int(size) <= maxsize and int(seeders) >= minseeders:
+                     
+                # Check torrent info
+                
+                torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
+                self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
+                                          
+                # Debug
+                for cookie in self.cookiejar:
+                    logger.debug ('Cookie: %s' % cookie) 
+                     
+                try:
+                    page = self.opener.open(url)
+                    torrent = page.read()
+                    if torrent:
+                        decoded = bencode.bdecode(torrent)
+                        metainfo = decoded['info']
+                    page.close ()
+                except Exception, e:
+                    logger.error('Error getting torrent: %s' % e)  
+                    return False      
+                
+                # get torrent track count
+                
+                trackcount = 0
+                
+                if 'files' in metainfo: # multi
+                    for pathfile in metainfo['files']:
+                        path = pathfile['path']
+                        for file in path:
+                            if '.ape' in file or '.flac' in file or '.ogg' in file or '.m4a' in file or '.aac' in file or '.mp3' in file or '.wav' in file or '.aif' in file:
+                                trackcount += 1
+                            
+                logger.debug ('torrent title: %s' % title)
+                logger.debug ('hp trackcount: %s' % hptrackcount) 
+                logger.debug ('torrent trackcount: %s' % trackcount)
+                
+                #Torrent topic page
+        
+                topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id
+                
+                # If torrent track count = hp track count then return torrent, 
+                # if greater, check for deluxe/special/foreign editions
+                
+                valid = False
+                
+                if trackcount == hptrackcount:
+                    valid = True
+                elif trackcount > hptrackcount:
+                    if 'eluxe' in title or 'dition' in title or 'apanese' in title or 'elease' in title:
+                        valid = True
+                        
+                # return 1st valid torrent if not checking by bitrate, else add to list and return at end
+                
+                if valid:
+                    rulist.append((title, size, topicurl))
+                    if not bitrate:
+                        return rulist
+                         
+        return rulist
+
+
+    def get_torrent(self, url, savelocation):
+    
+        torrent_id = dict([part.split('=') for part in urlparse(url)[4].split('&')])['t']
+        self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
+        downloadurl = 'http://dl.rutracker.org/forum/dl.php?t=' + torrent_id                  
+        torrent_name = torrent_id + '.torrent'
+        download_path = os.path.join(savelocation, torrent_name)
+        
+        try:
+            page = self.opener.open(downloadurl)
+            torrent = page.read()
+            fp = open (download_path, 'wb')
+            fp.write (torrent)
+            fp.close ()
+        except Exception, e:
+            logger.error('Error getting torrent: %s' % e)  
+            return False      
+        
+        return download_path
+        
diff --git a/headphones/webserve.py b/headphones/webserve.py
index 14f29a80..a6c3472a 100644
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -463,6 +463,9 @@ class WebInterface(object):
                     "use_waffles" : checked(headphones.WAFFLES),
                     "waffles_uid" : headphones.WAFFLES_UID,
                     "waffles_passkey": headphones.WAFFLES_PASSKEY,
+                    "use_rutracker" : checked(headphones.RUTRACKER),
+                    "rutracker_user" : headphones.RUTRACKER_USER,
+                    "rutracker_password": headphones.RUTRACKER_PASSWORD,
                     "pref_qual_0" : radio(headphones.PREFERRED_QUALITY, 0),
                     "pref_qual_1" : radio(headphones.PREFERRED_QUALITY, 1),
                     "pref_qual_3" : radio(headphones.PREFERRED_QUALITY, 3),
@@ -544,7 +547,7 @@ class WebInterface(object):
         sab_category=None, download_dir=None, blackhole=0, blackhole_dir=None, usenet_retention=None, nzbmatrix=0, nzbmatrix_username=None, nzbmatrix_apikey=None, 
         newznab=0, newznab_host=None, newznab_apikey=None, newznab_enabled=0, nzbsorg=0, nzbsorg_uid=None, nzbsorg_hash=None, newzbin=0, newzbin_uid=None, 
         newzbin_password=None, preferred_quality=0, preferred_bitrate=None, detect_bitrate=0, move_files=0, torrentblackhole_dir=None, download_torrent_dir=None, 
-        numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, rename_files=0, correct_metadata=0, 
+        numberofseeders=10, use_isohunt=0, use_kat=0, use_mininova=0, waffles=0, waffles_uid=None, waffles_passkey=None, rutracker=0, rutracker_user=None, rutracker_password=None, rename_files=0, correct_metadata=0, 
         cleanup_files=0, add_album_art=0, embed_album_art=0, embed_lyrics=0, destination_dir=None, lossless_destination_dir=None, folder_format=None, file_format=None, 
         include_extras=0, single=0, ep=0, compilation=0, soundtrack=0, live=0, remix=0, spokenword=0, audiobook=0, autowant_upcoming=False, autowant_all=False, 
         interface=None, log_dir=None, music_encoder=0, encoder=None, bitrate=None, samplingfrequency=None, encoderfolder=None, advancedencoder=None, 
@@ -594,6 +597,9 @@ class WebInterface(object):
         headphones.WAFFLES = waffles
         headphones.WAFFLES_UID = waffles_uid
         headphones.WAFFLES_PASSKEY = waffles_passkey
+        headphones.RUTRACKER = rutracker
+        headphones.RUTRACKER_USER = rutracker_user
+        headphones.RUTRACKER_PASSWORD = rutracker_password
         headphones.PREFERRED_QUALITY = int(preferred_quality)
         headphones.PREFERRED_BITRATE = preferred_bitrate
         headphones.PREFERRED_BITRATE_HIGH_BUFFER = preferred_bitrate_high_buffer
diff --git a/lib/bs4/__init__.py b/lib/bs4/__init__.py
new file mode 100644
index 00000000..af8c718d
--- /dev/null
+++ b/lib/bs4/__init__.py
@@ -0,0 +1,355 @@
+"""Beautiful Soup
+Elixir and Tonic
+"The Screen-Scraper's Friend"
+http://www.crummy.com/software/BeautifulSoup/
+
+Beautiful Soup uses a pluggable XML or HTML parser to parse a
+(possibly invalid) document into a tree representation. Beautiful Soup
+provides provides methods and Pythonic idioms that make it easy to
+navigate, search, and modify the parse tree.
+
+Beautiful Soup works with Python 2.6 and up. It works better if lxml
+and/or html5lib is installed.
+
+For more than you ever wanted to know about Beautiful Soup, see the
+documentation:
+http://www.crummy.com/software/BeautifulSoup/bs4/doc/
+"""
+
+__author__ = "Leonard Richardson (leonardr@segfault.org)"
+__version__ = "4.1.0"
+__copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
+__license__ = "MIT"
+
+__all__ = ['BeautifulSoup']
+
+import re
+import warnings
+
+from .builder import builder_registry
+from .dammit import UnicodeDammit
+from .element import (
+    CData,
+    Comment,
+    DEFAULT_OUTPUT_ENCODING,
+    Declaration,
+    Doctype,
+    NavigableString,
+    PageElement,
+    ProcessingInstruction,
+    ResultSet,
+    SoupStrainer,
+    Tag,
+    )
+
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 3 without converting it.
+syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+
+class BeautifulSoup(Tag):
+    """
+    This class defines the basic interface called by the tree builders.
+
+    These methods will be called by the parser:
+      reset()
+      feed(markup)
+
+    The tree builder may call these methods from its feed() implementation:
+      handle_starttag(name, attrs) # See note about return value
+      handle_endtag(name)
+      handle_data(data) # Appends to the current data node
+      endData(containerClass=NavigableString) # Ends the current data node
+
+    No matter how complicated the underlying parser is, you should be
+    able to build a tree using 'start tag' events, 'end tag' events,
+    'data' events, and "done with data" events.
+
+    If you encounter an empty-element tag (aka a self-closing tag,
+    like HTML's <br> tag), call handle_starttag and then
+    handle_endtag.
+    """
+    ROOT_TAG_NAME = u'[document]'
+
+    # If the end-user gives no indication which tree builder they
+    # want, look for one with these features.
+    DEFAULT_BUILDER_FEATURES = ['html', 'fast']
+
+    # Used when determining whether a text node is all whitespace and
+    # can be replaced with a single space. A text node that contains
+    # fancy Unicode spaces (usually non-breaking) should be left
+    # alone.
+    STRIP_ASCII_SPACES = {9: None, 10: None, 12: None, 13: None, 32: None, }
+
+    def __init__(self, markup="", features=None, builder=None,
+                 parse_only=None, from_encoding=None, **kwargs):
+        """The Soup object is initialized as the 'root tag', and the
+        provided markup (which can be a string or a file-like object)
+        is fed into the underlying parser."""
+
+        if 'convertEntities' in kwargs:
+            warnings.warn(
+                "BS4 does not respect the convertEntities argument to the "
+                "BeautifulSoup constructor. Entities are always converted "
+                "to Unicode characters.")
+
+        if 'markupMassage' in kwargs:
+            del kwargs['markupMassage']
+            warnings.warn(
+                "BS4 does not respect the markupMassage argument to the "
+                "BeautifulSoup constructor. The tree builder is responsible "
+                "for any necessary markup massage.")
+
+        if 'smartQuotesTo' in kwargs:
+            del kwargs['smartQuotesTo']
+            warnings.warn(
+                "BS4 does not respect the smartQuotesTo argument to the "
+                "BeautifulSoup constructor. Smart quotes are always converted "
+                "to Unicode characters.")
+
+        if 'selfClosingTags' in kwargs:
+            del kwargs['selfClosingTags']
+            warnings.warn(
+                "BS4 does not respect the selfClosingTags argument to the "
+                "BeautifulSoup constructor. The tree builder is responsible "
+                "for understanding self-closing tags.")
+
+        if 'isHTML' in kwargs:
+            del kwargs['isHTML']
+            warnings.warn(
+                "BS4 does not respect the isHTML argument to the "
+                "BeautifulSoup constructor. You can pass in features='html' "
+                "or features='xml' to get a builder capable of handling "
+                "one or the other.")
+
+        def deprecated_argument(old_name, new_name):
+            if old_name in kwargs:
+                warnings.warn(
+                    'The "%s" argument to the BeautifulSoup constructor '
+                    'has been renamed to "%s."' % (old_name, new_name))
+                value = kwargs[old_name]
+                del kwargs[old_name]
+                return value
+            return None
+
+        parse_only = parse_only or deprecated_argument(
+            "parseOnlyThese", "parse_only")
+
+        from_encoding = from_encoding or deprecated_argument(
+            "fromEncoding", "from_encoding")
+
+        if len(kwargs) > 0:
+            arg = kwargs.keys().pop()
+            raise TypeError(
+                "__init__() got an unexpected keyword argument '%s'" % arg)
+
+        if builder is None:
+            if isinstance(features, basestring):
+                features = [features]
+            if features is None or len(features) == 0:
+                features = self.DEFAULT_BUILDER_FEATURES
+            builder_class = builder_registry.lookup(*features)
+            if builder_class is None:
+                raise ValueError(
+                    "Couldn't find a tree builder with the features you "
+                    "requested: %s. Do you need to install a parser library?"
+                    % ",".join(features))
+            builder = builder_class()
+        self.builder = builder
+        self.is_xml = builder.is_xml
+        self.builder.soup = self
+
+        self.parse_only = parse_only
+
+        self.reset()
+
+        if hasattr(markup, 'read'):        # It's a file-type object.
+            markup = markup.read()
+        (self.markup, self.original_encoding, self.declared_html_encoding,
+         self.contains_replacement_characters) = (
+            self.builder.prepare_markup(markup, from_encoding))
+
+        try:
+            self._feed()
+        except StopParsing:
+            pass
+
+        # Clear out the markup and remove the builder's circular
+        # reference to this object.
+        self.markup = None
+        self.builder.soup = None
+
+    def _feed(self):
+        # Convert the document to Unicode.
+        self.builder.reset()
+
+        self.builder.feed(self.markup)
+        # Close out any unfinished strings and close all the open tags.
+        self.endData()
+        while self.currentTag.name != self.ROOT_TAG_NAME:
+            self.popTag()
+
+    def reset(self):
+        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
+        self.hidden = 1
+        self.builder.reset()
+        self.currentData = []
+        self.currentTag = None
+        self.tagStack = []
+        self.pushTag(self)
+
+    def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
+        """Create a new tag associated with this soup."""
+        return Tag(None, self.builder, name, namespace, nsprefix, attrs)
+
+    def new_string(self, s):
+        """Create a new NavigableString associated with this soup."""
+        navigable = NavigableString(s)
+        navigable.setup()
+        return navigable
+
+    def insert_before(self, successor):
+        raise ValueError("BeautifulSoup objects don't support insert_before().")
+
+    def insert_after(self, successor):
+        raise ValueError("BeautifulSoup objects don't support insert_after().")
+
+    def popTag(self):
+        tag = self.tagStack.pop()
+        #print "Pop", tag.name
+        if self.tagStack:
+            self.currentTag = self.tagStack[-1]
+        return self.currentTag
+
+    def pushTag(self, tag):
+        #print "Push", tag.name
+        if self.currentTag:
+            self.currentTag.contents.append(tag)
+        self.tagStack.append(tag)
+        self.currentTag = self.tagStack[-1]
+
+    def endData(self, containerClass=NavigableString):
+        if self.currentData:
+            currentData = u''.join(self.currentData)
+            if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
+                not set([tag.name for tag in self.tagStack]).intersection(
+                    self.builder.preserve_whitespace_tags)):
+                if '\n' in currentData:
+                    currentData = '\n'
+                else:
+                    currentData = ' '
+            self.currentData = []
+            if self.parse_only and len(self.tagStack) <= 1 and \
+                   (not self.parse_only.text or \
+                    not self.parse_only.search(currentData)):
+                return
+            o = containerClass(currentData)
+            self.object_was_parsed(o)
+
+    def object_was_parsed(self, o):
+        """Add an object to the parse tree."""
+        o.setup(self.currentTag, self.previous_element)
+        if self.previous_element:
+            self.previous_element.next_element = o
+        self.previous_element = o
+        self.currentTag.contents.append(o)
+
+    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
+        """Pops the tag stack up to and including the most recent
+        instance of the given tag. If inclusivePop is false, pops the tag
+        stack up to but *not* including the most recent instqance of
+        the given tag."""
+        #print "Popping to %s" % name
+        if name == self.ROOT_TAG_NAME:
+            return
+
+        numPops = 0
+        mostRecentTag = None
+
+        for i in range(len(self.tagStack) - 1, 0, -1):
+            if (name == self.tagStack[i].name
+                and nsprefix == self.tagStack[i].nsprefix == nsprefix):
+                numPops = len(self.tagStack) - i
+                break
+        if not inclusivePop:
+            numPops = numPops - 1
+
+        for i in range(0, numPops):
+            mostRecentTag = self.popTag()
+        return mostRecentTag
+
+    def handle_starttag(self, name, namespace, nsprefix, attrs):
+        """Push a start tag on to the stack.
+
+        If this method returns None, the tag was rejected by the
+        SoupStrainer. You should proceed as if the tag had not occured
+        in the document. For instance, if this was a self-closing tag,
+        don't call handle_endtag.
+        """
+
+        # print "Start tag %s: %s" % (name, attrs)
+        self.endData()
+
+        if (self.parse_only and len(self.tagStack) <= 1
+            and (self.parse_only.text
+                 or not self.parse_only.search_tag(name, attrs))):
+            return None
+
+        tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
+                  self.currentTag, self.previous_element)
+        if tag is None:
+            return tag
+        if self.previous_element:
+            self.previous_element.next_element = tag
+        self.previous_element = tag
+        self.pushTag(tag)
+        return tag
+
+    def handle_endtag(self, name, nsprefix=None):
+        #print "End tag: " + name
+        self.endData()
+        self._popToTag(name, nsprefix)
+
+    def handle_data(self, data):
+        self.currentData.append(data)
+
+    def decode(self, pretty_print=False,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+               formatter="minimal"):
+        """Returns a string or Unicode representation of this document.
+        To get Unicode, pass None for encoding."""
+
+        if self.is_xml:
+            # Print the XML declaration
+            encoding_part = ''
+            if eventual_encoding != None:
+                encoding_part = ' encoding="%s"' % eventual_encoding
+            prefix = u'<?xml version="1.0"%s?>\n' % encoding_part
+        else:
+            prefix = u''
+        if not pretty_print:
+            indent_level = None
+        else:
+            indent_level = 0
+        return prefix + super(BeautifulSoup, self).decode(
+            indent_level, eventual_encoding, formatter)
+
+class BeautifulStoneSoup(BeautifulSoup):
+    """Deprecated interface to an XML parser."""
+
+    def __init__(self, *args, **kwargs):
+        kwargs['features'] = 'xml'
+        warnings.warn(
+            'The BeautifulStoneSoup class is deprecated. Instead of using '
+            'it, pass features="xml" into the BeautifulSoup constructor.')
+        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
+
+
+class StopParsing(Exception):
+    pass
+
+
+#By default, act as an HTML pretty-printer.
+if __name__ == '__main__':
+    import sys
+    soup = BeautifulSoup(sys.stdin)
+    print soup.prettify()
diff --git a/lib/bs4/builder/__init__.py b/lib/bs4/builder/__init__.py
new file mode 100644
index 00000000..4c22b864
--- /dev/null
+++ b/lib/bs4/builder/__init__.py
@@ -0,0 +1,307 @@
+from collections import defaultdict
+import itertools
+import sys
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    ContentMetaAttributeValue,
+    whitespace_re
+    )
+
+__all__ = [
+    'HTMLTreeBuilder',
+    'SAXTreeBuilder',
+    'TreeBuilder',
+    'TreeBuilderRegistry',
+    ]
+
+# Some useful features for a TreeBuilder to have.
+FAST = 'fast'
+PERMISSIVE = 'permissive'
+STRICT = 'strict'
+XML = 'xml'
+HTML = 'html'
+HTML_5 = 'html5'
+
+
+class TreeBuilderRegistry(object):
+
+    def __init__(self):
+        self.builders_for_feature = defaultdict(list)
+        self.builders = []
+
+    def register(self, treebuilder_class):
+        """Register a treebuilder based on its advertised features."""
+        for feature in treebuilder_class.features:
+            self.builders_for_feature[feature].insert(0, treebuilder_class)
+        self.builders.insert(0, treebuilder_class)
+
+    def lookup(self, *features):
+        if len(self.builders) == 0:
+            # There are no builders at all.
+            return None
+
+        if len(features) == 0:
+            # They didn't ask for any features. Give them the most
+            # recently registered builder.
+            return self.builders[0]
+
+        # Go down the list of features in order, and eliminate any builders
+        # that don't match every feature.
+        features = list(features)
+        features.reverse()
+        candidates = None
+        candidate_set = None
+        while len(features) > 0:
+            feature = features.pop()
+            we_have_the_feature = self.builders_for_feature.get(feature, [])
+            if len(we_have_the_feature) > 0:
+                if candidates is None:
+                    candidates = we_have_the_feature
+                    candidate_set = set(candidates)
+                else:
+                    # Eliminate any candidates that don't have this feature.
+                    candidate_set = candidate_set.intersection(
+                        set(we_have_the_feature))
+
+        # The only valid candidates are the ones in candidate_set.
+        # Go through the original list of candidates and pick the first one
+        # that's in candidate_set.
+        if candidate_set is None:
+            return None
+        for candidate in candidates:
+            if candidate in candidate_set:
+                return candidate
+        return None
+
+# The BeautifulSoup class will take feature lists from developers and use them
+# to look up builders in this registry.
+builder_registry = TreeBuilderRegistry()
+
+class TreeBuilder(object):
+    """Turn a document into a Beautiful Soup object tree."""
+
+    features = []
+
+    is_xml = False
+    preserve_whitespace_tags = set()
+    empty_element_tags = None # A tag will be considered an empty-element
+                              # tag when and only when it has no contents.
+
+    # A value for these tag/attribute combinations is a space- or
+    # comma-separated list of CDATA, rather than a single CDATA.
+    cdata_list_attributes = {}
+
+
+    def __init__(self):
+        self.soup = None
+
+    def reset(self):
+        pass
+
+    def can_be_empty_element(self, tag_name):
+        """Might a tag with this name be an empty-element tag?
+
+        The final markup may or may not actually present this tag as
+        self-closing.
+
+        For instance: an HTMLBuilder does not consider a <p> tag to be
+        an empty-element tag (it's not in
+        HTMLBuilder.empty_element_tags). This means an empty <p> tag
+        will be presented as "<p></p>", not "<p />".
+
+        The default implementation has no opinion about which tags are
+        empty-element tags, so a tag will be presented as an
+        empty-element tag if and only if it has no contents.
+        "<foo></foo>" will become "<foo />", and "<foo>bar</foo>" will
+        be left alone.
+        """
+        if self.empty_element_tags is None:
+            return True
+        return tag_name in self.empty_element_tags
+
+    def feed(self, markup):
+        raise NotImplementedError()
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       document_declared_encoding=None):
+        return markup, None, None, False
+
+    def test_fragment_to_document(self, fragment):
+        """Wrap an HTML fragment to make it look like a document.
+
+        Different parsers do this differently. For instance, lxml
+        introduces an empty <head> tag, and html5lib
+        doesn't. Abstracting this away lets us write simple tests
+        which run HTML fragments through the parser and compare the
+        results against other HTML fragments.
+
+        This method should not be used outside of tests.
+        """
+        return fragment
+
+    def set_up_substitutions(self, tag):
+        return False
+
+    def _replace_cdata_list_attribute_values(self, tag_name, attrs):
+        """Replaces class="foo bar" with class=["foo", "bar"]
+
+        Modifies its input in place.
+        """
+        if self.cdata_list_attributes:
+            universal = self.cdata_list_attributes.get('*', [])
+            tag_specific = self.cdata_list_attributes.get(
+                tag_name.lower(), [])
+            for cdata_list_attr in itertools.chain(universal, tag_specific):
+                if cdata_list_attr in dict(attrs):
+                    # Basically, we have a "class" attribute whose
+                    # value is a whitespace-separated list of CSS
+                    # classes. Split it into a list.
+                    value = attrs[cdata_list_attr]
+                    values = whitespace_re.split(value)
+                    attrs[cdata_list_attr] = values
+        return attrs
+
+class SAXTreeBuilder(TreeBuilder):
+    """A Beautiful Soup treebuilder that listens for SAX events."""
+
+    def feed(self, markup):
+        raise NotImplementedError()
+
+    def close(self):
+        pass
+
+    def startElement(self, name, attrs):
+        attrs = dict((key[1], value) for key, value in list(attrs.items()))
+        #print "Start %s, %r" % (name, attrs)
+        self.soup.handle_starttag(name, attrs)
+
+    def endElement(self, name):
+        #print "End %s" % name
+        self.soup.handle_endtag(name)
+
+    def startElementNS(self, nsTuple, nodeName, attrs):
+        # Throw away (ns, nodeName) for now.
+        self.startElement(nodeName, attrs)
+
+    def endElementNS(self, nsTuple, nodeName):
+        # Throw away (ns, nodeName) for now.
+        self.endElement(nodeName)
+        #handler.endElementNS((ns, node.nodeName), node.nodeName)
+
+    def startPrefixMapping(self, prefix, nodeValue):
+        # Ignore the prefix for now.
+        pass
+
+    def endPrefixMapping(self, prefix):
+        # Ignore the prefix for now.
+        # handler.endPrefixMapping(prefix)
+        pass
+
+    def characters(self, content):
+        self.soup.handle_data(content)
+
+    def startDocument(self):
+        pass
+
+    def endDocument(self):
+        pass
+
+
+class HTMLTreeBuilder(TreeBuilder):
+    """This TreeBuilder knows facts about HTML.
+
+    Such as which tags are empty-element tags.
+    """
+
+    preserve_whitespace_tags = set(['pre', 'textarea'])
+    empty_element_tags = set(['br' , 'hr', 'input', 'img', 'meta',
+                              'spacer', 'link', 'frame', 'base'])
+
+    # The HTML standard defines these attributes as containing a
+    # space-separated list of values, not a single value. That is,
+    # class="foo bar" means that the 'class' attribute has two values,
+    # 'foo' and 'bar', not the single value 'foo bar'.  When we
+    # encounter one of these attributes, we will parse its value into
+    # a list of values if possible. Upon output, the list will be
+    # converted back into a string.
+    cdata_list_attributes = {
+        "*" : ['class', 'accesskey', 'dropzone'],
+        "a" : ['rel', 'rev'],
+        "link" :  ['rel', 'rev'],
+        "td" : ["headers"],
+        "th" : ["headers"],
+        "td" : ["headers"],
+        "form" : ["accept-charset"],
+        "object" : ["archive"],
+
+        # These are HTML5 specific, as are *.accesskey and *.dropzone above.
+        "area" : ["rel"],
+        "icon" : ["sizes"],
+        "iframe" : ["sandbox"],
+        "output" : ["for"],
+        }
+
+    def set_up_substitutions(self, tag):
+        # We are only interested in <meta> tags
+        if tag.name != 'meta':
+            return False
+
+        http_equiv = tag.get('http-equiv')
+        content = tag.get('content')
+        charset = tag.get('charset')
+
+        # We are interested in <meta> tags that say what encoding the
+        # document was originally in. This means HTML 5-style <meta>
+        # tags that provide the "charset" attribute. It also means
+        # HTML 4-style <meta> tags that provide the "content"
+        # attribute and have "http-equiv" set to "content-type".
+        #
+        # In both cases we will replace the value of the appropriate
+        # attribute with a standin object that can take on any
+        # encoding.
+        meta_encoding = None
+        if charset is not None:
+            # HTML 5 style:
+            # <meta charset="utf8">
+            meta_encoding = charset
+            tag['charset'] = CharsetMetaAttributeValue(charset)
+
+        elif (content is not None and http_equiv is not None
+              and http_equiv.lower() == 'content-type'):
+            # HTML 4 style:
+            # <meta http-equiv="content-type" content="text/html; charset=utf8">
+            tag['content'] = ContentMetaAttributeValue(content)
+
+        return (meta_encoding is not None)
+
+def register_treebuilders_from(module):
+    """Copy TreeBuilders from the given module into this module."""
+    # I'm fairly sure this is not the best way to do this.
+    this_module = sys.modules['bs4.builder']
+    for name in module.__all__:
+        obj = getattr(module, name)
+
+        if issubclass(obj, TreeBuilder):
+            setattr(this_module, name, obj)
+            this_module.__all__.append(name)
+            # Register the builder while we're at it.
+            this_module.builder_registry.register(obj)
+
+# Builders are registered in reverse order of priority, so that custom
+# builder registrations will take precedence. In general, we want lxml
+# to take precedence over html5lib, because it's faster. And we only
+# want to use HTMLParser as a last result.
+from . import _htmlparser
+register_treebuilders_from(_htmlparser)
+try:
+    from . import _html5lib
+    register_treebuilders_from(_html5lib)
+except ImportError:
+    # They don't have html5lib installed.
+    pass
+try:
+    from . import _lxml
+    register_treebuilders_from(_lxml)
+except ImportError:
+    # They don't have lxml installed.
+    pass
diff --git a/lib/bs4/builder/_html5lib.py b/lib/bs4/builder/_html5lib.py
new file mode 100644
index 00000000..6001e386
--- /dev/null
+++ b/lib/bs4/builder/_html5lib.py
@@ -0,0 +1,222 @@
+__all__ = [
+    'HTML5TreeBuilder',
+    ]
+
+import warnings
+from bs4.builder import (
+    PERMISSIVE,
+    HTML,
+    HTML_5,
+    HTMLTreeBuilder,
+    )
+from bs4.element import NamespacedAttribute
+import html5lib
+from html5lib.constants import namespaces
+from bs4.element import (
+    Comment,
+    Doctype,
+    NavigableString,
+    Tag,
+    )
+
+class HTML5TreeBuilder(HTMLTreeBuilder):
+    """Use html5lib to build a tree."""
+
+    features = ['html5lib', PERMISSIVE, HTML_5, HTML]
+
+    def prepare_markup(self, markup, user_specified_encoding):
+        # Store the user-specified encoding for use later on.
+        self.user_specified_encoding = user_specified_encoding
+        return markup, None, None, False
+
+    # These methods are defined by Beautiful Soup.
+    def feed(self, markup):
+        if self.soup.parse_only is not None:
+            warnings.warn("You provided a value for parse_only, but the html5lib tree builder doesn't support parse_only. The entire document will be parsed.")
+        parser = html5lib.HTMLParser(tree=self.create_treebuilder)
+        doc = parser.parse(markup, encoding=self.user_specified_encoding)
+
+        # Set the character encoding detected by the tokenizer.
+        if isinstance(markup, unicode):
+            # We need to special-case this because html5lib sets
+            # charEncoding to UTF-8 if it gets Unicode input.
+            doc.original_encoding = None
+        else:
+            doc.original_encoding = parser.tokenizer.stream.charEncoding[0]
+
+    def create_treebuilder(self, namespaceHTMLElements):
+        self.underlying_builder = TreeBuilderForHtml5lib(
+            self.soup, namespaceHTMLElements)
+        return self.underlying_builder
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<html><head></head><body>%s</body></html>' % fragment
+
+
+class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
+
+    def __init__(self, soup, namespaceHTMLElements):
+        self.soup = soup
+        super(TreeBuilderForHtml5lib, self).__init__(namespaceHTMLElements)
+
+    def documentClass(self):
+        self.soup.reset()
+        return Element(self.soup, self.soup, None)
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        doctype = Doctype.for_name_and_ids(name, publicId, systemId)
+        self.soup.object_was_parsed(doctype)
+
+    def elementClass(self, name, namespace):
+        tag = self.soup.new_tag(name, namespace)
+        return Element(tag, self.soup, namespace)
+
+    def commentClass(self, data):
+        return TextNode(Comment(data), self.soup)
+
+    def fragmentClass(self):
+        self.soup = BeautifulSoup("")
+        self.soup.name = "[document_fragment]"
+        return Element(self.soup, self.soup, None)
+
+    def appendChild(self, node):
+        # XXX This code is not covered by the BS4 tests.
+        self.soup.append(node.element)
+
+    def getDocument(self):
+        return self.soup
+
+    def getFragment(self):
+        return html5lib.treebuilders._base.TreeBuilder.getFragment(self).element
+
+class AttrList(object):
+    def __init__(self, element):
+        self.element = element
+        self.attrs = dict(self.element.attrs)
+    def __iter__(self):
+        return list(self.attrs.items()).__iter__()
+    def __setitem__(self, name, value):
+        "set attr", name, value
+        self.element[name] = value
+    def items(self):
+        return list(self.attrs.items())
+    def keys(self):
+        return list(self.attrs.keys())
+    def __len__(self):
+        return len(self.attrs)
+    def __getitem__(self, name):
+        return self.attrs[name]
+    def __contains__(self, name):
+        return name in list(self.attrs.keys())
+
+
+class Element(html5lib.treebuilders._base.Node):
+    def __init__(self, element, soup, namespace):
+        html5lib.treebuilders._base.Node.__init__(self, element.name)
+        self.element = element
+        self.soup = soup
+        self.namespace = namespace
+
+    def appendChild(self, node):
+        if (node.element.__class__ == NavigableString and self.element.contents
+            and self.element.contents[-1].__class__ == NavigableString):
+            # Concatenate new text onto old text node
+            # XXX This has O(n^2) performance, for input like
+            # "a</a>a</a>a</a>..."
+            old_element = self.element.contents[-1]
+            new_element = self.soup.new_string(old_element + node.element)
+            old_element.replace_with(new_element)
+        else:
+            self.element.append(node.element)
+            node.parent = self
+
+    def getAttributes(self):
+        return AttrList(self.element)
+
+    def setAttributes(self, attributes):
+        if attributes is not None and len(attributes) > 0:
+
+            converted_attributes = []
+            for name, value in list(attributes.items()):
+                if isinstance(name, tuple):
+                    new_name = NamespacedAttribute(*name)
+                    del attributes[name]
+                    attributes[new_name] = value
+
+            self.soup.builder._replace_cdata_list_attribute_values(
+                self.name, attributes)
+            for name, value in attributes.items():
+                self.element[name] = value
+
+            # The attributes may contain variables that need substitution.
+            # Call set_up_substitutions manually.
+            #
+            # The Tag constructor called this method when the Tag was created,
+            # but we just set/changed the attributes, so call it again.
+            self.soup.builder.set_up_substitutions(self.element)
+    attributes = property(getAttributes, setAttributes)
+
+    def insertText(self, data, insertBefore=None):
+        text = TextNode(self.soup.new_string(data), self.soup)
+        if insertBefore:
+            self.insertBefore(text, insertBefore)
+        else:
+            self.appendChild(text)
+
+    def insertBefore(self, node, refNode):
+        index = self.element.index(refNode.element)
+        if (node.element.__class__ == NavigableString and self.element.contents
+            and self.element.contents[index-1].__class__ == NavigableString):
+            # (See comments in appendChild)
+            old_node = self.element.contents[index-1]
+            new_str = self.soup.new_string(old_node + node.element)
+            old_node.replace_with(new_str)
+        else:
+            self.element.insert(index, node.element)
+            node.parent = self
+
+    def removeChild(self, node):
+        node.element.extract()
+
+    def reparentChildren(self, newParent):
+        while self.element.contents:
+            child = self.element.contents[0]
+            child.extract()
+            if isinstance(child, Tag):
+                newParent.appendChild(
+                    Element(child, self.soup, namespaces["html"]))
+            else:
+                newParent.appendChild(
+                    TextNode(child, self.soup))
+
+    def cloneNode(self):
+        tag = self.soup.new_tag(self.element.name, self.namespace)
+        node = Element(tag, self.soup, self.namespace)
+        for key,value in self.attributes:
+            node.attributes[key] = value
+        return node
+
+    def hasContent(self):
+        return self.element.contents
+
+    def getNameTuple(self):
+        if self.namespace == None:
+            return namespaces["html"], self.name
+        else:
+            return self.namespace, self.name
+
+    nameTuple = property(getNameTuple)
+
+class TextNode(Element):
+    def __init__(self, element, soup):
+        html5lib.treebuilders._base.Node.__init__(self, None)
+        self.element = element
+        self.soup = soup
+
+    def cloneNode(self):
+        raise NotImplementedError
diff --git a/lib/bs4/builder/_htmlparser.py b/lib/bs4/builder/_htmlparser.py
new file mode 100644
index 00000000..ede5cecb
--- /dev/null
+++ b/lib/bs4/builder/_htmlparser.py
@@ -0,0 +1,244 @@
+"""Use the HTMLParser library to parse HTML files that aren't too bad."""
+
+__all__ = [
+    'HTMLParserTreeBuilder',
+    ]
+
+from HTMLParser import (
+    HTMLParser,
+    HTMLParseError,
+    )
+import sys
+import warnings
+
+# Starting in Python 3.2, the HTMLParser constructor takes a 'strict'
+# argument, which we'd like to set to False. Unfortunately,
+# http://bugs.python.org/issue13273 makes strict=True a better bet
+# before Python 3.2.3.
+#
+# At the end of this file, we monkeypatch HTMLParser so that
+# strict=True works well on Python 3.2.2.
+major, minor, release = sys.version_info[:3]
+CONSTRUCTOR_TAKES_STRICT = (
+    major > 3
+    or (major == 3 and minor > 2)
+    or (major == 3 and minor == 2 and release >= 3))
+
+from bs4.element import (
+    CData,
+    Comment,
+    Declaration,
+    Doctype,
+    ProcessingInstruction,
+    )
+from bs4.dammit import EntitySubstitution, UnicodeDammit
+
+from bs4.builder import (
+    HTML,
+    HTMLTreeBuilder,
+    STRICT,
+    )
+
+
+HTMLPARSER = 'html.parser'
+
+class BeautifulSoupHTMLParser(HTMLParser):
+    def handle_starttag(self, name, attrs):
+        # XXX namespace
+        self.soup.handle_starttag(name, None, None, dict(attrs))
+
+    def handle_endtag(self, name):
+        self.soup.handle_endtag(name)
+
+    def handle_data(self, data):
+        self.soup.handle_data(data)
+
+    def handle_charref(self, name):
+        # XXX workaround for a bug in HTMLParser. Remove this once
+        # it's fixed.
+        if name.startswith('x'):
+            real_name = int(name.lstrip('x'), 16)
+        else:
+            real_name = int(name)
+
+        try:
+            data = unichr(real_name)
+        except (ValueError, OverflowError), e:
+            data = u"\N{REPLACEMENT CHARACTER}"
+
+        self.handle_data(data)
+
+    def handle_entityref(self, name):
+        character = EntitySubstitution.HTML_ENTITY_TO_CHARACTER.get(name)
+        if character is not None:
+            data = character
+        else:
+            data = "&%s;" % name
+        self.handle_data(data)
+
+    def handle_comment(self, data):
+        self.soup.endData()
+        self.soup.handle_data(data)
+        self.soup.endData(Comment)
+
+    def handle_decl(self, data):
+        self.soup.endData()
+        if data.startswith("DOCTYPE "):
+            data = data[len("DOCTYPE "):]
+        self.soup.handle_data(data)
+        self.soup.endData(Doctype)
+
+    def unknown_decl(self, data):
+        if data.upper().startswith('CDATA['):
+            cls = CData
+            data = data[len('CDATA['):]
+        else:
+            cls = Declaration
+        self.soup.endData()
+        self.soup.handle_data(data)
+        self.soup.endData(cls)
+
+    def handle_pi(self, data):
+        self.soup.endData()
+        if data.endswith("?") and data.lower().startswith("xml"):
+            # "An XHTML processing instruction using the trailing '?'
+            # will cause the '?' to be included in data." - HTMLParser
+            # docs.
+            #
+            # Strip the question mark so we don't end up with two
+            # question marks.
+            data = data[:-1]
+        self.soup.handle_data(data)
+        self.soup.endData(ProcessingInstruction)
+
+
+class HTMLParserTreeBuilder(HTMLTreeBuilder):
+
+    is_xml = False
+    features = [HTML, STRICT, HTMLPARSER]
+
+    def __init__(self, *args, **kwargs):
+        if CONSTRUCTOR_TAKES_STRICT:
+            kwargs['strict'] = False
+        self.parser_args = (args, kwargs)
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       document_declared_encoding=None):
+        """
+        :return: A 4-tuple (markup, original encoding, encoding
+        declared within markup, whether any characters had to be
+        replaced with REPLACEMENT CHARACTER).
+        """
+        if isinstance(markup, unicode):
+            return markup, None, None, False
+
+        try_encodings = [user_specified_encoding, document_declared_encoding]
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+        return (dammit.markup, dammit.original_encoding,
+                dammit.declared_html_encoding,
+                dammit.contains_replacement_characters)
+
+    def feed(self, markup):
+        args, kwargs = self.parser_args
+        parser = BeautifulSoupHTMLParser(*args, **kwargs)
+        parser.soup = self.soup
+        try:
+            parser.feed(markup)
+        except HTMLParseError, e:
+            warnings.warn(RuntimeWarning(
+                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
+            raise e
+
+# Patch 3.2 versions of HTMLParser earlier than 3.2.3 to use some
+# 3.2.3 code. This ensures they don't treat markup like <p></p> as a
+# string.
+#
+# XXX This code can be removed once most Python 3 users are on 3.2.3.
+if major == 3 and minor == 2 and not CONSTRUCTOR_TAKES_STRICT:
+    import re
+    attrfind_tolerant = re.compile(
+        r'\s*((?<=[\'"\s])[^\s/>][^\s/=>]*)(\s*=+\s*'
+        r'(\'[^\']*\'|"[^"]*"|(?![\'"])[^>\s]*))?')
+    HTMLParserTreeBuilder.attrfind_tolerant = attrfind_tolerant
+
+    locatestarttagend = re.compile(r"""
+  <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
+  (?:\s+                             # whitespace before attribute name
+    (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
+      (?:\s*=\s*                     # value indicator
+        (?:'[^']*'                   # LITA-enclosed value
+          |\"[^\"]*\"                # LIT-enclosed value
+          |[^'\">\s]+                # bare value
+         )
+       )?
+     )
+   )*
+  \s*                                # trailing whitespace
+""", re.VERBOSE)
+    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
+
+    from html.parser import tagfind, attrfind
+
+    def parse_starttag(self, i):
+        self.__starttag_text = None
+        endpos = self.check_for_whole_start_tag(i)
+        if endpos < 0:
+            return endpos
+        rawdata = self.rawdata
+        self.__starttag_text = rawdata[i:endpos]
+
+        # Now parse the data between i+1 and j into a tag and attrs
+        attrs = []
+        match = tagfind.match(rawdata, i+1)
+        assert match, 'unexpected call to parse_starttag()'
+        k = match.end()
+        self.lasttag = tag = rawdata[i+1:k].lower()
+        while k < endpos:
+            if self.strict:
+                m = attrfind.match(rawdata, k)
+            else:
+                m = attrfind_tolerant.match(rawdata, k)
+            if not m:
+                break
+            attrname, rest, attrvalue = m.group(1, 2, 3)
+            if not rest:
+                attrvalue = None
+            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
+                 attrvalue[:1] == '"' == attrvalue[-1:]:
+                attrvalue = attrvalue[1:-1]
+            if attrvalue:
+                attrvalue = self.unescape(attrvalue)
+            attrs.append((attrname.lower(), attrvalue))
+            k = m.end()
+
+        end = rawdata[k:endpos].strip()
+        if end not in (">", "/>"):
+            lineno, offset = self.getpos()
+            if "\n" in self.__starttag_text:
+                lineno = lineno + self.__starttag_text.count("\n")
+                offset = len(self.__starttag_text) \
+                         - self.__starttag_text.rfind("\n")
+            else:
+                offset = offset + len(self.__starttag_text)
+            if self.strict:
+                self.error("junk characters in start tag: %r"
+                           % (rawdata[k:endpos][:20],))
+            self.handle_data(rawdata[i:endpos])
+            return endpos
+        if end.endswith('/>'):
+            # XHTML-style empty tag: <span attr="value" />
+            self.handle_startendtag(tag, attrs)
+        else:
+            self.handle_starttag(tag, attrs)
+            if tag in self.CDATA_CONTENT_ELEMENTS:
+                self.set_cdata_mode(tag)
+        return endpos
+
+    def set_cdata_mode(self, elem):
+        self.cdata_elem = elem.lower()
+        self.interesting = re.compile(r'</\s*%s\s*>' % self.cdata_elem, re.I)
+
+    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
+    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
+
+    CONSTRUCTOR_TAKES_STRICT = True
diff --git a/lib/bs4/builder/_lxml.py b/lib/bs4/builder/_lxml.py
new file mode 100644
index 00000000..c78fdff6
--- /dev/null
+++ b/lib/bs4/builder/_lxml.py
@@ -0,0 +1,179 @@
+__all__ = [
+    'LXMLTreeBuilderForXML',
+    'LXMLTreeBuilder',
+    ]
+
+from StringIO import StringIO
+import collections
+from lxml import etree
+from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.builder import (
+    FAST,
+    HTML,
+    HTMLTreeBuilder,
+    PERMISSIVE,
+    TreeBuilder,
+    XML)
+from bs4.dammit import UnicodeDammit
+
+LXML = 'lxml'
+
+class LXMLTreeBuilderForXML(TreeBuilder):
+    DEFAULT_PARSER_CLASS = etree.XMLParser
+
+    is_xml = True
+
+    # Well, it's permissive by XML parser standards.
+    features = [LXML, XML, FAST, PERMISSIVE]
+
+    CHUNK_SIZE = 512
+
+    @property
+    def default_parser(self):
+        # This can either return a parser object or a class, which
+        # will be instantiated with default arguments.
+        return etree.XMLParser(target=self, strip_cdata=False, recover=True)
+
+    def __init__(self, parser=None, empty_element_tags=None):
+        if empty_element_tags is not None:
+            self.empty_element_tags = set(empty_element_tags)
+        if parser is None:
+            # Use the default parser.
+            parser = self.default_parser
+        if isinstance(parser, collections.Callable):
+            # Instantiate the parser with default arguments
+            parser = parser(target=self, strip_cdata=False)
+        self.parser = parser
+        self.soup = None
+        self.nsmaps = None
+
+    def _getNsTag(self, tag):
+        # Split the namespace URL out of a fully-qualified lxml tag
+        # name. Copied from lxml's src/lxml/sax.py.
+        if tag[0] == '{':
+            return tuple(tag[1:].split('}', 1))
+        else:
+            return (None, tag)
+
+    def prepare_markup(self, markup, user_specified_encoding=None,
+                       document_declared_encoding=None):
+        """
+        :return: A 3-tuple (markup, original encoding, encoding
+        declared within markup).
+        """
+        if isinstance(markup, unicode):
+            return markup, None, None, False
+
+        try_encodings = [user_specified_encoding, document_declared_encoding]
+        dammit = UnicodeDammit(markup, try_encodings, is_html=True)
+        return (dammit.markup, dammit.original_encoding,
+                dammit.declared_html_encoding,
+                dammit.contains_replacement_characters)
+
+    def feed(self, markup):
+        if isinstance(markup, basestring):
+            markup = StringIO(markup)
+        # Call feed() at least once, even if the markup is empty,
+        # or the parser won't be initialized.
+        data = markup.read(self.CHUNK_SIZE)
+        self.parser.feed(data)
+        while data != '':
+            # Now call feed() on the rest of the data, chunk by chunk.
+            data = markup.read(self.CHUNK_SIZE)
+            if data != '':
+                self.parser.feed(data)
+        self.parser.close()
+
+    def close(self):
+        self.nsmaps = None
+
+    def start(self, name, attrs, nsmap={}):
+        # Make sure attrs is a mutable dict--lxml may send an immutable dictproxy.
+        attrs = dict(attrs)
+
+        nsprefix = None
+        # Invert each namespace map as it comes in.
+        if len(nsmap) == 0 and self.nsmaps != None:
+            # There are no new namespaces for this tag, but namespaces
+            # are in play, so we need a separate tag stack to know
+            # when they end.
+            self.nsmaps.append(None)
+        elif len(nsmap) > 0:
+            # A new namespace mapping has come into play.
+            if self.nsmaps is None:
+                self.nsmaps = []
+            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+            self.nsmaps.append(inverted_nsmap)
+            # Also treat the namespace mapping as a set of attributes on the
+            # tag, so we can recreate it later.
+            attrs = attrs.copy()
+            for prefix, namespace in nsmap.items():
+                attribute = NamespacedAttribute(
+                    "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
+                attrs[attribute] = namespace
+        namespace, name = self._getNsTag(name)
+        if namespace is not None:
+            for inverted_nsmap in reversed(self.nsmaps):
+                if inverted_nsmap is not None and namespace in inverted_nsmap:
+                    nsprefix = inverted_nsmap[namespace]
+                    break
+        self.soup.handle_starttag(name, namespace, nsprefix, attrs)
+
+    def end(self, name):
+        self.soup.endData()
+        completed_tag = self.soup.tagStack[-1]
+        namespace, name = self._getNsTag(name)
+        nsprefix = None
+        if namespace is not None:
+            for inverted_nsmap in reversed(self.nsmaps):
+                if inverted_nsmap is not None and namespace in inverted_nsmap:
+                    nsprefix = inverted_nsmap[namespace]
+                    break
+        self.soup.handle_endtag(name, nsprefix)
+        if self.nsmaps != None:
+            # This tag, or one of its parents, introduced a namespace
+            # mapping, so pop it off the stack.
+            self.nsmaps.pop()
+            if len(self.nsmaps) == 0:
+                # Namespaces are no longer in play, so don't bother keeping
+                # track of the namespace stack.
+                self.nsmaps = None
+
+    def pi(self, target, data):
+        pass
+
+    def data(self, content):
+        self.soup.handle_data(content)
+
+    def doctype(self, name, pubid, system):
+        self.soup.endData()
+        doctype = Doctype.for_name_and_ids(name, pubid, system)
+        self.soup.object_was_parsed(doctype)
+
+    def comment(self, content):
+        "Handle comments as Comment objects."
+        self.soup.endData()
+        self.soup.handle_data(content)
+        self.soup.endData(Comment)
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+
+
+class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
+
+    features = [LXML, HTML, FAST, PERMISSIVE]
+    is_xml = False
+
+    @property
+    def default_parser(self):
+        return etree.HTMLParser
+
+    def feed(self, markup):
+        self.parser.feed(markup)
+        self.parser.close()
+
+    def test_fragment_to_document(self, fragment):
+        """See `TreeBuilder`."""
+        return u'<html><body>%s</body></html>' % fragment
diff --git a/lib/bs4/dammit.py b/lib/bs4/dammit.py
new file mode 100644
index 00000000..58cad9ba
--- /dev/null
+++ b/lib/bs4/dammit.py
@@ -0,0 +1,792 @@
+# -*- coding: utf-8 -*-
+"""Beautiful Soup bonus library: Unicode, Dammit
+
+This class forces XML data into a standard format (usually to UTF-8 or
+Unicode).  It is heavily based on code from Mark Pilgrim's Universal
+Feed Parser. It does not rewrite the XML or HTML to reflect a new
+encoding; that's the tree builder's job.
+"""
+
+import codecs
+from htmlentitydefs import codepoint2name
+import re
+import warnings
+
+# Autodetects character encodings. Very useful.
+# Download from http://chardet.feedparser.org/
+#  or 'apt-get install python-chardet'
+#  or 'easy_install chardet'
+try:
+    import chardet
+    #import chardet.constants
+    #chardet.constants._debug = 1
+except ImportError:
+    chardet = None
+
+# Available from http://cjkpython.i18n.org/.
+try:
+    import iconv_codec
+except ImportError:
+    pass
+
+xml_encoding_re = re.compile(
+    '^<\?.*encoding=[\'"](.*?)[\'"].*\?>'.encode(), re.I)
+html_meta_re = re.compile(
+    '<\s*meta[^>]+charset\s*=\s*["\']?([^>]*?)[ /;\'">]'.encode(), re.I)
+
+class EntitySubstitution(object):
+
+    """Substitute XML or HTML entities for the corresponding characters."""
+
+    def _populate_class_variables():
+        lookup = {}
+        reverse_lookup = {}
+        characters_for_re = []
+        for codepoint, name in list(codepoint2name.items()):
+            character = unichr(codepoint)
+            if codepoint != 34:
+                # There's no point in turning the quotation mark into
+                # &quot;, unless it happens within an attribute value, which
+                # is handled elsewhere.
+                characters_for_re.append(character)
+                lookup[character] = name
+            # But we do want to turn &quot; into the quotation mark.
+            reverse_lookup[name] = character
+        re_definition = "[%s]" % "".join(characters_for_re)
+        return lookup, reverse_lookup, re.compile(re_definition)
+    (CHARACTER_TO_HTML_ENTITY, HTML_ENTITY_TO_CHARACTER,
+     CHARACTER_TO_HTML_ENTITY_RE) = _populate_class_variables()
+
+    CHARACTER_TO_XML_ENTITY = {
+        "'": "apos",
+        '"': "quot",
+        "&": "amp",
+        "<": "lt",
+        ">": "gt",
+        }
+
+    BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
+                                           "&(?!#\d+;|#x[0-9a-fA-F]+;|\w+;)"
+                                           ")")
+
+    @classmethod
+    def _substitute_html_entity(cls, matchobj):
+        entity = cls.CHARACTER_TO_HTML_ENTITY.get(matchobj.group(0))
+        return "&%s;" % entity
+
+    @classmethod
+    def _substitute_xml_entity(cls, matchobj):
+        """Used with a regular expression to substitute the
+        appropriate XML entity for an XML special character."""
+        entity = cls.CHARACTER_TO_XML_ENTITY[matchobj.group(0)]
+        return "&%s;" % entity
+
+    @classmethod
+    def quoted_attribute_value(self, value):
+        """Make a value into a quoted XML attribute, possibly escaping it.
+
+         Most strings will be quoted using double quotes.
+
+          Bob's Bar -> "Bob's Bar"
+
+         If a string contains double quotes, it will be quoted using
+         single quotes.
+
+          Welcome to "my bar" -> 'Welcome to "my bar"'
+
+         If a string contains both single and double quotes, the
+         double quotes will be escaped, and the string will be quoted
+         using double quotes.
+
+          Welcome to "Bob's Bar" -> "Welcome to &quot;Bob's bar&quot;
+        """
+        quote_with = '"'
+        if '"' in value:
+            if "'" in value:
+                # The string contains both single and double
+                # quotes.  Turn the double quotes into
+                # entities. We quote the double quotes rather than
+                # the single quotes because the entity name is
+                # "&quot;" whether this is HTML or XML.  If we
+                # quoted the single quotes, we'd have to decide
+                # between &apos; and &squot;.
+                replace_with = "&quot;"
+                value = value.replace('"', replace_with)
+            else:
+                # There are double quotes but no single quotes.
+                # We can use single quotes to quote the attribute.
+                quote_with = "'"
+        return quote_with + value + quote_with
+
+    @classmethod
+    def substitute_xml(cls, value, make_quoted_attribute=False):
+        """Substitute XML entities for special XML characters.
+
+        :param value: A string to be substituted. The less-than sign will
+          become &lt;, the greater-than sign will become &gt;, and any
+          ampersands that are not part of an entity defition will
+          become &amp;.
+
+        :param make_quoted_attribute: If True, then the string will be
+         quoted, as befits an attribute value.
+        """
+        # Escape angle brackets, and ampersands that aren't part of
+        # entities.
+        value = cls.BARE_AMPERSAND_OR_BRACKET.sub(
+            cls._substitute_xml_entity, value)
+
+        if make_quoted_attribute:
+            value = cls.quoted_attribute_value(value)
+        return value
+
+    @classmethod
+    def substitute_html(cls, s):
+        """Replace certain Unicode characters with named HTML entities.
+
+        This differs from data.encode(encoding, 'xmlcharrefreplace')
+        in that the goal is to make the result more readable (to those
+        with ASCII displays) rather than to recover from
+        errors. There's absolutely nothing wrong with a UTF-8 string
+        containg a LATIN SMALL LETTER E WITH ACUTE, but replacing that
+        character with "&eacute;" will make it more readable to some
+        people.
+        """
+        return cls.CHARACTER_TO_HTML_ENTITY_RE.sub(
+            cls._substitute_html_entity, s)
+
+
+class UnicodeDammit:
+    """A class for detecting the encoding of a *ML document and
+    converting it to a Unicode string. If the source encoding is
+    windows-1252, can replace MS smart quotes with their HTML or XML
+    equivalents."""
+
+    # This dictionary maps commonly seen values for "charset" in HTML
+    # meta tags to the corresponding Python codec names. It only covers
+    # values that aren't in Python's aliases and can't be determined
+    # by the heuristics in find_codec.
+    CHARSET_ALIASES = {"macintosh": "mac-roman",
+                       "x-sjis": "shift-jis"}
+
+    ENCODINGS_WITH_SMART_QUOTES = [
+        "windows-1252",
+        "iso-8859-1",
+        "iso-8859-2",
+        ]
+
+    def __init__(self, markup, override_encodings=[],
+                 smart_quotes_to=None, is_html=False):
+        self.declared_html_encoding = None
+        self.smart_quotes_to = smart_quotes_to
+        self.tried_encodings = []
+        self.contains_replacement_characters = False
+
+        if markup == '' or isinstance(markup, unicode):
+            self.markup = markup
+            self.unicode_markup = unicode(markup)
+            self.original_encoding = None
+            return
+
+        new_markup, document_encoding, sniffed_encoding = \
+            self._detectEncoding(markup, is_html)
+        self.markup = new_markup
+
+        u = None
+        if new_markup != markup:
+            # _detectEncoding modified the markup, then converted it to
+            # Unicode and then to UTF-8. So convert it from UTF-8.
+            u = self._convert_from("utf8")
+            self.original_encoding = sniffed_encoding
+
+        if not u:
+            for proposed_encoding in (
+                override_encodings + [document_encoding, sniffed_encoding]):
+                if proposed_encoding is not None:
+                    u = self._convert_from(proposed_encoding)
+                    if u:
+                        break
+
+        # If no luck and we have auto-detection library, try that:
+        if not u and chardet and not isinstance(self.markup, unicode):
+            u = self._convert_from(chardet.detect(self.markup)['encoding'])
+
+        # As a last resort, try utf-8 and windows-1252:
+        if not u:
+            for proposed_encoding in ("utf-8", "windows-1252"):
+                u = self._convert_from(proposed_encoding)
+                if u:
+                    break
+
+        # As an absolute last resort, try the encodings again with
+        # character replacement.
+        if not u:
+            for proposed_encoding in (
+                override_encodings + [
+                    document_encoding, sniffed_encoding, "utf-8", "windows-1252"]):
+                if proposed_encoding != "ascii":
+                    u = self._convert_from(proposed_encoding, "replace")
+                if u is not None:
+                    warnings.warn(
+                        UnicodeWarning(
+                            "Some characters could not be decoded, and were "
+                            "replaced with REPLACEMENT CHARACTER."))
+                    self.contains_replacement_characters = True
+                    break
+
+        # We could at this point force it to ASCII, but that would
+        # destroy so much data that I think giving up is better
+        self.unicode_markup = u
+        if not u:
+            self.original_encoding = None
+
+    def _sub_ms_char(self, match):
+        """Changes a MS smart quote character to an XML or HTML
+        entity, or an ASCII character."""
+        orig = match.group(1)
+        if self.smart_quotes_to == 'ascii':
+            sub = self.MS_CHARS_TO_ASCII.get(orig).encode()
+        else:
+            sub = self.MS_CHARS.get(orig)
+            if type(sub) == tuple:
+                if self.smart_quotes_to == 'xml':
+                    sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
+                else:
+                    sub = '&'.encode() + sub[0].encode() + ';'.encode()
+            else:
+                sub = sub.encode()
+        return sub
+
+    def _convert_from(self, proposed, errors="strict"):
+        proposed = self.find_codec(proposed)
+        if not proposed or (proposed, errors) in self.tried_encodings:
+            return None
+        self.tried_encodings.append((proposed, errors))
+        markup = self.markup
+
+        # Convert smart quotes to HTML if coming from an encoding
+        # that might have them.
+        if (self.smart_quotes_to is not None
+            and proposed.lower() in self.ENCODINGS_WITH_SMART_QUOTES):
+            smart_quotes_re = b"([\x80-\x9f])"
+            smart_quotes_compiled = re.compile(smart_quotes_re)
+            markup = smart_quotes_compiled.sub(self._sub_ms_char, markup)
+
+        try:
+            #print "Trying to convert document to %s (errors=%s)" % (
+            #    proposed, errors)
+            u = self._to_unicode(markup, proposed, errors)
+            self.markup = u
+            self.original_encoding = proposed
+        except Exception as e:
+            #print "That didn't work!"
+            #print e
+            return None
+        #print "Correct encoding: %s" % proposed
+        return self.markup
+
+    def _to_unicode(self, data, encoding, errors="strict"):
+        '''Given a string and its encoding, decodes the string into Unicode.
+        %encoding is a string recognized by encodings.aliases'''
+
+        # strip Byte Order Mark (if present)
+        if (len(data) >= 4) and (data[:2] == '\xfe\xff') \
+               and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16be'
+            data = data[2:]
+        elif (len(data) >= 4) and (data[:2] == '\xff\xfe') \
+                 and (data[2:4] != '\x00\x00'):
+            encoding = 'utf-16le'
+            data = data[2:]
+        elif data[:3] == '\xef\xbb\xbf':
+            encoding = 'utf-8'
+            data = data[3:]
+        elif data[:4] == '\x00\x00\xfe\xff':
+            encoding = 'utf-32be'
+            data = data[4:]
+        elif data[:4] == '\xff\xfe\x00\x00':
+            encoding = 'utf-32le'
+            data = data[4:]
+        newdata = unicode(data, encoding, errors)
+        return newdata
+
+    def _detectEncoding(self, xml_data, is_html=False):
+        """Given a document, tries to detect its XML encoding."""
+        xml_encoding = sniffed_xml_encoding = None
+        try:
+            if xml_data[:4] == b'\x4c\x6f\xa7\x94':
+                # EBCDIC
+                xml_data = self._ebcdic_to_ascii(xml_data)
+            elif xml_data[:4] == b'\x00\x3c\x00\x3f':
+                # UTF-16BE
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xfe\xff') \
+                     and (xml_data[2:4] != b'\x00\x00'):
+                # UTF-16BE with BOM
+                sniffed_xml_encoding = 'utf-16be'
+                xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
+            elif xml_data[:4] == b'\x3c\x00\x3f\x00':
+                # UTF-16LE
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
+            elif (len(xml_data) >= 4) and (xml_data[:2] == b'\xff\xfe') and \
+                     (xml_data[2:4] != b'\x00\x00'):
+                # UTF-16LE with BOM
+                sniffed_xml_encoding = 'utf-16le'
+                xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
+            elif xml_data[:4] == b'\x00\x00\x00\x3c':
+                # UTF-32BE
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == b'\x3c\x00\x00\x00':
+                # UTF-32LE
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
+            elif xml_data[:4] == b'\x00\x00\xfe\xff':
+                # UTF-32BE with BOM
+                sniffed_xml_encoding = 'utf-32be'
+                xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
+            elif xml_data[:4] == b'\xff\xfe\x00\x00':
+                # UTF-32LE with BOM
+                sniffed_xml_encoding = 'utf-32le'
+                xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
+            elif xml_data[:3] == b'\xef\xbb\xbf':
+                # UTF-8 with BOM
+                sniffed_xml_encoding = 'utf-8'
+                xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
+            else:
+                sniffed_xml_encoding = 'ascii'
+                pass
+        except:
+            xml_encoding_match = None
+        xml_encoding_match = xml_encoding_re.match(xml_data)
+        if not xml_encoding_match and is_html:
+            xml_encoding_match = html_meta_re.search(xml_data)
+        if xml_encoding_match is not None:
+            xml_encoding = xml_encoding_match.groups()[0].decode(
+                'ascii').lower()
+            if is_html:
+                self.declared_html_encoding = xml_encoding
+            if sniffed_xml_encoding and \
+               (xml_encoding in ('iso-10646-ucs-2', 'ucs-2', 'csunicode',
+                                 'iso-10646-ucs-4', 'ucs-4', 'csucs4',
+                                 'utf-16', 'utf-32', 'utf_16', 'utf_32',
+                                 'utf16', 'u16')):
+                xml_encoding = sniffed_xml_encoding
+        return xml_data, xml_encoding, sniffed_xml_encoding
+
+    def find_codec(self, charset):
+        return self._codec(self.CHARSET_ALIASES.get(charset, charset)) \
+               or (charset and self._codec(charset.replace("-", ""))) \
+               or (charset and self._codec(charset.replace("-", "_"))) \
+               or charset
+
+    def _codec(self, charset):
+        if not charset:
+            return charset
+        codec = None
+        try:
+            codecs.lookup(charset)
+            codec = charset
+        except (LookupError, ValueError):
+            pass
+        return codec
+
+    EBCDIC_TO_ASCII_MAP = None
+
+    def _ebcdic_to_ascii(self, s):
+        c = self.__class__
+        if not c.EBCDIC_TO_ASCII_MAP:
+            emap = (0,1,2,3,156,9,134,127,151,141,142,11,12,13,14,15,
+                    16,17,18,19,157,133,8,135,24,25,146,143,28,29,30,31,
+                    128,129,130,131,132,10,23,27,136,137,138,139,140,5,6,7,
+                    144,145,22,147,148,149,150,4,152,153,154,155,20,21,158,26,
+                    32,160,161,162,163,164,165,166,167,168,91,46,60,40,43,33,
+                    38,169,170,171,172,173,174,175,176,177,93,36,42,41,59,94,
+                    45,47,178,179,180,181,182,183,184,185,124,44,37,95,62,63,
+                    186,187,188,189,190,191,192,193,194,96,58,35,64,39,61,34,
+                    195,97,98,99,100,101,102,103,104,105,196,197,198,199,200,
+                    201,202,106,107,108,109,110,111,112,113,114,203,204,205,
+                    206,207,208,209,126,115,116,117,118,119,120,121,122,210,
+                    211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+                    225,226,227,228,229,230,231,123,65,66,67,68,69,70,71,72,
+                    73,232,233,234,235,236,237,125,74,75,76,77,78,79,80,81,
+                    82,238,239,240,241,242,243,92,159,83,84,85,86,87,88,89,
+                    90,244,245,246,247,248,249,48,49,50,51,52,53,54,55,56,57,
+                    250,251,252,253,254,255)
+            import string
+            c.EBCDIC_TO_ASCII_MAP = string.maketrans(
+            ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
+        return s.translate(c.EBCDIC_TO_ASCII_MAP)
+
+    # A partial mapping of ISO-Latin-1 to HTML entities/XML numeric entities.
+    MS_CHARS = {b'\x80': ('euro', '20AC'),
+                b'\x81': ' ',
+                b'\x82': ('sbquo', '201A'),
+                b'\x83': ('fnof', '192'),
+                b'\x84': ('bdquo', '201E'),
+                b'\x85': ('hellip', '2026'),
+                b'\x86': ('dagger', '2020'),
+                b'\x87': ('Dagger', '2021'),
+                b'\x88': ('circ', '2C6'),
+                b'\x89': ('permil', '2030'),
+                b'\x8A': ('Scaron', '160'),
+                b'\x8B': ('lsaquo', '2039'),
+                b'\x8C': ('OElig', '152'),
+                b'\x8D': '?',
+                b'\x8E': ('#x17D', '17D'),
+                b'\x8F': '?',
+                b'\x90': '?',
+                b'\x91': ('lsquo', '2018'),
+                b'\x92': ('rsquo', '2019'),
+                b'\x93': ('ldquo', '201C'),
+                b'\x94': ('rdquo', '201D'),
+                b'\x95': ('bull', '2022'),
+                b'\x96': ('ndash', '2013'),
+                b'\x97': ('mdash', '2014'),
+                b'\x98': ('tilde', '2DC'),
+                b'\x99': ('trade', '2122'),
+                b'\x9a': ('scaron', '161'),
+                b'\x9b': ('rsaquo', '203A'),
+                b'\x9c': ('oelig', '153'),
+                b'\x9d': '?',
+                b'\x9e': ('#x17E', '17E'),
+                b'\x9f': ('Yuml', ''),}
+
+    # A parochial partial mapping of ISO-Latin-1 to ASCII. Contains
+    # horrors like stripping diacritical marks to turn á into a, but also
+    # contains non-horrors like turning “ into ".
+    MS_CHARS_TO_ASCII = {
+        b'\x80' : 'EUR',
+        b'\x81' : ' ',
+        b'\x82' : ',',
+        b'\x83' : 'f',
+        b'\x84' : ',,',
+        b'\x85' : '...',
+        b'\x86' : '+',
+        b'\x87' : '++',
+        b'\x88' : '^',
+        b'\x89' : '%',
+        b'\x8a' : 'S',
+        b'\x8b' : '<',
+        b'\x8c' : 'OE',
+        b'\x8d' : '?',
+        b'\x8e' : 'Z',
+        b'\x8f' : '?',
+        b'\x90' : '?',
+        b'\x91' : "'",
+        b'\x92' : "'",
+        b'\x93' : '"',
+        b'\x94' : '"',
+        b'\x95' : '*',
+        b'\x96' : '-',
+        b'\x97' : '--',
+        b'\x98' : '~',
+        b'\x99' : '(TM)',
+        b'\x9a' : 's',
+        b'\x9b' : '>',
+        b'\x9c' : 'oe',
+        b'\x9d' : '?',
+        b'\x9e' : 'z',
+        b'\x9f' : 'Y',
+        b'\xa0' : ' ',
+        b'\xa1' : '!',
+        b'\xa2' : 'c',
+        b'\xa3' : 'GBP',
+        b'\xa4' : '$', #This approximation is especially parochial--this is the
+                       #generic currency symbol.
+        b'\xa5' : 'YEN',
+        b'\xa6' : '|',
+        b'\xa7' : 'S',
+        b'\xa8' : '..',
+        b'\xa9' : '',
+        b'\xaa' : '(th)',
+        b'\xab' : '<<',
+        b'\xac' : '!',
+        b'\xad' : ' ',
+        b'\xae' : '(R)',
+        b'\xaf' : '-',
+        b'\xb0' : 'o',
+        b'\xb1' : '+-',
+        b'\xb2' : '2',
+        b'\xb3' : '3',
+        b'\xb4' : ("'", 'acute'),
+        b'\xb5' : 'u',
+        b'\xb6' : 'P',
+        b'\xb7' : '*',
+        b'\xb8' : ',',
+        b'\xb9' : '1',
+        b'\xba' : '(th)',
+        b'\xbb' : '>>',
+        b'\xbc' : '1/4',
+        b'\xbd' : '1/2',
+        b'\xbe' : '3/4',
+        b'\xbf' : '?',
+        b'\xc0' : 'A',
+        b'\xc1' : 'A',
+        b'\xc2' : 'A',
+        b'\xc3' : 'A',
+        b'\xc4' : 'A',
+        b'\xc5' : 'A',
+        b'\xc6' : 'AE',
+        b'\xc7' : 'C',
+        b'\xc8' : 'E',
+        b'\xc9' : 'E',
+        b'\xca' : 'E',
+        b'\xcb' : 'E',
+        b'\xcc' : 'I',
+        b'\xcd' : 'I',
+        b'\xce' : 'I',
+        b'\xcf' : 'I',
+        b'\xd0' : 'D',
+        b'\xd1' : 'N',
+        b'\xd2' : 'O',
+        b'\xd3' : 'O',
+        b'\xd4' : 'O',
+        b'\xd5' : 'O',
+        b'\xd6' : 'O',
+        b'\xd7' : '*',
+        b'\xd8' : 'O',
+        b'\xd9' : 'U',
+        b'\xda' : 'U',
+        b'\xdb' : 'U',
+        b'\xdc' : 'U',
+        b'\xdd' : 'Y',
+        b'\xde' : 'b',
+        b'\xdf' : 'B',
+        b'\xe0' : 'a',
+        b'\xe1' : 'a',
+        b'\xe2' : 'a',
+        b'\xe3' : 'a',
+        b'\xe4' : 'a',
+        b'\xe5' : 'a',
+        b'\xe6' : 'ae',
+        b'\xe7' : 'c',
+        b'\xe8' : 'e',
+        b'\xe9' : 'e',
+        b'\xea' : 'e',
+        b'\xeb' : 'e',
+        b'\xec' : 'i',
+        b'\xed' : 'i',
+        b'\xee' : 'i',
+        b'\xef' : 'i',
+        b'\xf0' : 'o',
+        b'\xf1' : 'n',
+        b'\xf2' : 'o',
+        b'\xf3' : 'o',
+        b'\xf4' : 'o',
+        b'\xf5' : 'o',
+        b'\xf6' : 'o',
+        b'\xf7' : '/',
+        b'\xf8' : 'o',
+        b'\xf9' : 'u',
+        b'\xfa' : 'u',
+        b'\xfb' : 'u',
+        b'\xfc' : 'u',
+        b'\xfd' : 'y',
+        b'\xfe' : 'b',
+        b'\xff' : 'y',
+        }
+
+    # A map used when removing rogue Windows-1252/ISO-8859-1
+    # characters in otherwise UTF-8 documents.
+    #
+    # Note that \x81, \x8d, \x8f, \x90, and \x9d are undefined in
+    # Windows-1252.
+    WINDOWS_1252_TO_UTF8 = {
+        0x80 : b'\xe2\x82\xac', # €
+        0x82 : b'\xe2\x80\x9a', # ‚
+        0x83 : b'\xc6\x92',     # ƒ
+        0x84 : b'\xe2\x80\x9e', # „
+        0x85 : b'\xe2\x80\xa6', # …
+        0x86 : b'\xe2\x80\xa0', # †
+        0x87 : b'\xe2\x80\xa1', # ‡
+        0x88 : b'\xcb\x86',     # ˆ
+        0x89 : b'\xe2\x80\xb0', # ‰
+        0x8a : b'\xc5\xa0',     # Š
+        0x8b : b'\xe2\x80\xb9', # ‹
+        0x8c : b'\xc5\x92',     # Œ
+        0x8e : b'\xc5\xbd',     # Ž
+        0x91 : b'\xe2\x80\x98', # ‘
+        0x92 : b'\xe2\x80\x99', # ’
+        0x93 : b'\xe2\x80\x9c', # “
+        0x94 : b'\xe2\x80\x9d', # ”
+        0x95 : b'\xe2\x80\xa2', # •
+        0x96 : b'\xe2\x80\x93', # –
+        0x97 : b'\xe2\x80\x94', # —
+        0x98 : b'\xcb\x9c',     # ˜
+        0x99 : b'\xe2\x84\xa2', # ™
+        0x9a : b'\xc5\xa1',     # š
+        0x9b : b'\xe2\x80\xba', # ›
+        0x9c : b'\xc5\x93',     # œ
+        0x9e : b'\xc5\xbe',     # ž
+        0x9f : b'\xc5\xb8',     # Ÿ
+        0xa0 : b'\xc2\xa0',     #  
+        0xa1 : b'\xc2\xa1',     # ¡
+        0xa2 : b'\xc2\xa2',     # ¢
+        0xa3 : b'\xc2\xa3',     # £
+        0xa4 : b'\xc2\xa4',     # ¤
+        0xa5 : b'\xc2\xa5',     # ¥
+        0xa6 : b'\xc2\xa6',     # ¦
+        0xa7 : b'\xc2\xa7',     # §
+        0xa8 : b'\xc2\xa8',     # ¨
+        0xa9 : b'\xc2\xa9',     # ©
+        0xaa : b'\xc2\xaa',     # ª
+        0xab : b'\xc2\xab',     # «
+        0xac : b'\xc2\xac',     # ¬
+        0xad : b'\xc2\xad',     # ­
+        0xae : b'\xc2\xae',     # ®
+        0xaf : b'\xc2\xaf',     # ¯
+        0xb0 : b'\xc2\xb0',     # °
+        0xb1 : b'\xc2\xb1',     # ±
+        0xb2 : b'\xc2\xb2',     # ²
+        0xb3 : b'\xc2\xb3',     # ³
+        0xb4 : b'\xc2\xb4',     # ´
+        0xb5 : b'\xc2\xb5',     # µ
+        0xb6 : b'\xc2\xb6',     # ¶
+        0xb7 : b'\xc2\xb7',     # ·
+        0xb8 : b'\xc2\xb8',     # ¸
+        0xb9 : b'\xc2\xb9',     # ¹
+        0xba : b'\xc2\xba',     # º
+        0xbb : b'\xc2\xbb',     # »
+        0xbc : b'\xc2\xbc',     # ¼
+        0xbd : b'\xc2\xbd',     # ½
+        0xbe : b'\xc2\xbe',     # ¾
+        0xbf : b'\xc2\xbf',     # ¿
+        0xc0 : b'\xc3\x80',     # À
+        0xc1 : b'\xc3\x81',     # Á
+        0xc2 : b'\xc3\x82',     # Â
+        0xc3 : b'\xc3\x83',     # Ã
+        0xc4 : b'\xc3\x84',     # Ä
+        0xc5 : b'\xc3\x85',     # Å
+        0xc6 : b'\xc3\x86',     # Æ
+        0xc7 : b'\xc3\x87',     # Ç
+        0xc8 : b'\xc3\x88',     # È
+        0xc9 : b'\xc3\x89',     # É
+        0xca : b'\xc3\x8a',     # Ê
+        0xcb : b'\xc3\x8b',     # Ë
+        0xcc : b'\xc3\x8c',     # Ì
+        0xcd : b'\xc3\x8d',     # Í
+        0xce : b'\xc3\x8e',     # Î
+        0xcf : b'\xc3\x8f',     # Ï
+        0xd0 : b'\xc3\x90',     # Ð
+        0xd1 : b'\xc3\x91',     # Ñ
+        0xd2 : b'\xc3\x92',     # Ò
+        0xd3 : b'\xc3\x93',     # Ó
+        0xd4 : b'\xc3\x94',     # Ô
+        0xd5 : b'\xc3\x95',     # Õ
+        0xd6 : b'\xc3\x96',     # Ö
+        0xd7 : b'\xc3\x97',     # ×
+        0xd8 : b'\xc3\x98',     # Ø
+        0xd9 : b'\xc3\x99',     # Ù
+        0xda : b'\xc3\x9a',     # Ú
+        0xdb : b'\xc3\x9b',     # Û
+        0xdc : b'\xc3\x9c',     # Ü
+        0xdd : b'\xc3\x9d',     # Ý
+        0xde : b'\xc3\x9e',     # Þ
+        0xdf : b'\xc3\x9f',     # ß
+        0xe0 : b'\xc3\xa0',     # à
+        0xe1 : b'\xa1',     # á
+        0xe2 : b'\xc3\xa2',     # â
+        0xe3 : b'\xc3\xa3',     # ã
+        0xe4 : b'\xc3\xa4',     # ä
+        0xe5 : b'\xc3\xa5',     # å
+        0xe6 : b'\xc3\xa6',     # æ
+        0xe7 : b'\xc3\xa7',     # ç
+        0xe8 : b'\xc3\xa8',     # è
+        0xe9 : b'\xc3\xa9',     # é
+        0xea : b'\xc3\xaa',     # ê
+        0xeb : b'\xc3\xab',     # ë
+        0xec : b'\xc3\xac',     # ì
+        0xed : b'\xc3\xad',     # í
+        0xee : b'\xc3\xae',     # î
+        0xef : b'\xc3\xaf',     # ï
+        0xf0 : b'\xc3\xb0',     # ð
+        0xf1 : b'\xc3\xb1',     # ñ
+        0xf2 : b'\xc3\xb2',     # ò
+        0xf3 : b'\xc3\xb3',     # ó
+        0xf4 : b'\xc3\xb4',     # ô
+        0xf5 : b'\xc3\xb5',     # õ
+        0xf6 : b'\xc3\xb6',     # ö
+        0xf7 : b'\xc3\xb7',     # ÷
+        0xf8 : b'\xc3\xb8',     # ø
+        0xf9 : b'\xc3\xb9',     # ù
+        0xfa : b'\xc3\xba',     # ú
+        0xfb : b'\xc3\xbb',     # û
+        0xfc : b'\xc3\xbc',     # ü
+        0xfd : b'\xc3\xbd',     # ý
+        0xfe : b'\xc3\xbe',     # þ
+        }
+
+    MULTIBYTE_MARKERS_AND_SIZES = [
+        (0xc2, 0xdf, 2), # 2-byte characters start with a byte C2-DF
+        (0xe0, 0xef, 3), # 3-byte characters start with E0-EF
+        (0xf0, 0xf4, 4), # 4-byte characters start with F0-F4
+        ]
+
+    FIRST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[0][0]
+    LAST_MULTIBYTE_MARKER = MULTIBYTE_MARKERS_AND_SIZES[-1][1]
+
+    @classmethod
+    def detwingle(cls, in_bytes, main_encoding="utf8",
+                  embedded_encoding="windows-1252"):
+        """Fix characters from one encoding embedded in some other encoding.
+
+        Currently the only situation supported is Windows-1252 (or its
+        subset ISO-8859-1), embedded in UTF-8.
+
+        The input must be a bytestring. If you've already converted
+        the document to Unicode, you're too late.
+
+        The output is a bytestring in which `embedded_encoding`
+        characters have been converted to their `main_encoding`
+        equivalents.
+        """
+        if embedded_encoding.replace('_', '-').lower() not in (
+            'windows-1252', 'windows_1252'):
+            raise NotImplementedError(
+                "Windows-1252 and ISO-8859-1 are the only currently supported "
+                "embedded encodings.")
+
+        if main_encoding.lower() not in ('utf8', 'utf-8'):
+            raise NotImplementedError(
+                "UTF-8 is the only currently supported main encoding.")
+
+        byte_chunks = []
+
+        chunk_start = 0
+        pos = 0
+        while pos < len(in_bytes):
+            byte = in_bytes[pos]
+            if not isinstance(byte, int):
+                # Python 2.x
+                byte = ord(byte)
+            if (byte >= cls.FIRST_MULTIBYTE_MARKER
+                and byte <= cls.LAST_MULTIBYTE_MARKER):
+                # This is the start of a UTF-8 multibyte character. Skip
+                # to the end.
+                for start, end, size in cls.MULTIBYTE_MARKERS_AND_SIZES:
+                    if byte >= start and byte <= end:
+                        pos += size
+                        break
+            elif byte >= 0x80 and byte in cls.WINDOWS_1252_TO_UTF8:
+                # We found a Windows-1252 character!
+                # Save the string up to this point as a chunk.
+                byte_chunks.append(in_bytes[chunk_start:pos])
+
+                # Now translate the Windows-1252 character into UTF-8
+                # and add it as another, one-byte chunk.
+                byte_chunks.append(cls.WINDOWS_1252_TO_UTF8[byte])
+                pos += 1
+                chunk_start = pos
+            else:
+                # Go on to the next character.
+                pos += 1
+        if chunk_start == 0:
+            # The string is unchanged.
+            return in_bytes
+        else:
+            # Store the final chunk.
+            byte_chunks.append(in_bytes[chunk_start:])
+        return b''.join(byte_chunks)
+
diff --git a/lib/bs4/element.py b/lib/bs4/element.py
new file mode 100644
index 00000000..91a40078
--- /dev/null
+++ b/lib/bs4/element.py
@@ -0,0 +1,1347 @@
+import collections
+import re
+import sys
+import warnings
+from bs4.dammit import EntitySubstitution
+
+DEFAULT_OUTPUT_ENCODING = "utf-8"
+PY3K = (sys.version_info[0] > 2)
+
+whitespace_re = re.compile("\s+")
+
+def _alias(attr):
+    """Alias one attribute name to another for backward compatibility"""
+    @property
+    def alias(self):
+        return getattr(self, attr)
+
+    @alias.setter
+    def alias(self):
+        return setattr(self, attr)
+    return alias
+
+
+class NamespacedAttribute(unicode):
+
+    def __new__(cls, prefix, name, namespace=None):
+        if name is None:
+            obj = unicode.__new__(cls, prefix)
+        else:
+            obj = unicode.__new__(cls, prefix + ":" + name)
+        obj.prefix = prefix
+        obj.name = name
+        obj.namespace = namespace
+        return obj
+
+class AttributeValueWithCharsetSubstitution(unicode):
+    """A stand-in object for a character encoding specified in HTML."""
+
+class CharsetMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+    """A generic stand-in for the value of a meta tag's 'charset' attribute.
+
+    When Beautiful Soup parses the markup '<meta charset="utf8">', the
+    value of the 'charset' attribute will be one of these objects.
+    """
+
+    def __new__(cls, original_value):
+        obj = unicode.__new__(cls, original_value)
+        obj.original_value = original_value
+        return obj
+
+    def encode(self, encoding):
+        return encoding
+
+
+class ContentMetaAttributeValue(AttributeValueWithCharsetSubstitution):
+    """A generic stand-in for the value of a meta tag's 'content' attribute.
+
+    When Beautiful Soup parses the markup:
+     <meta http-equiv="content-type" content="text/html; charset=utf8">
+
+    The value of the 'content' attribute will be one of these objects.
+    """
+
+    CHARSET_RE = re.compile("((^|;)\s*charset=)([^;]*)", re.M)
+
+    def __new__(cls, original_value):
+        match = cls.CHARSET_RE.search(original_value)
+        if match is None:
+            # No substitution necessary.
+            return unicode.__new__(unicode, original_value)
+
+        obj = unicode.__new__(cls, original_value)
+        obj.original_value = original_value
+        return obj
+
+    def encode(self, encoding):
+        def rewrite(match):
+            return match.group(1) + encoding
+        return self.CHARSET_RE.sub(rewrite, self.original_value)
+
+
+class PageElement(object):
+    """Contains the navigational information for some part of the page
+    (either a tag or a piece of text)"""
+
+    # There are five possible values for the "formatter" argument passed in
+    # to methods like encode() and prettify():
+    #
+    # "html" - All Unicode characters with corresponding HTML entities
+    #   are converted to those entities on output.
+    # "minimal" - Bare ampersands and angle brackets are converted to
+    #   XML entities: &amp; &lt; &gt;
+    # None - The null formatter. Unicode characters are never
+    #   converted to entities.  This is not recommended, but it's
+    #   faster than "minimal".
+    # A function - This function will be called on every string that
+    #  needs to undergo entity substition
+    FORMATTERS = {
+        "html" : EntitySubstitution.substitute_html,
+        "minimal" : EntitySubstitution.substitute_xml,
+        None : None
+        }
+
+    @classmethod
+    def format_string(self, s, formatter='minimal'):
+        """Format the given string using the given formatter."""
+        if not callable(formatter):
+            formatter = self.FORMATTERS.get(
+                formatter, EntitySubstitution.substitute_xml)
+        if formatter is None:
+            output = s
+        else:
+            output = formatter(s)
+        return output
+
+    def setup(self, parent=None, previous_element=None):
+        """Sets up the initial relations between this element and
+        other elements."""
+        self.parent = parent
+        self.previous_element = previous_element
+        if previous_element is not None:
+            self.previous_element.next_element = self
+        self.next_element = None
+        self.previous_sibling = None
+        self.next_sibling = None
+        if self.parent is not None and self.parent.contents:
+            self.previous_sibling = self.parent.contents[-1]
+            self.previous_sibling.next_sibling = self
+
+    nextSibling = _alias("next_sibling")  # BS3
+    previousSibling = _alias("previous_sibling")  # BS3
+
+    def replace_with(self, replace_with):
+        if replace_with is self:
+            return
+        if replace_with is self.parent:
+            raise ValueError("Cannot replace a Tag with its parent.")
+        old_parent = self.parent
+        my_index = self.parent.index(self)
+        self.extract()
+        old_parent.insert(my_index, replace_with)
+        return self
+    replaceWith = replace_with  # BS3
+
+    def unwrap(self):
+        my_parent = self.parent
+        my_index = self.parent.index(self)
+        self.extract()
+        for child in reversed(self.contents[:]):
+            my_parent.insert(my_index, child)
+        return self
+    replace_with_children = unwrap
+    replaceWithChildren = unwrap  # BS3
+
+    def wrap(self, wrap_inside):
+        me = self.replace_with(wrap_inside)
+        wrap_inside.append(me)
+        return wrap_inside
+
+    def extract(self):
+        """Destructively rips this element out of the tree."""
+        if self.parent is not None:
+            del self.parent.contents[self.parent.index(self)]
+
+        #Find the two elements that would be next to each other if
+        #this element (and any children) hadn't been parsed. Connect
+        #the two.
+        last_child = self._last_descendant()
+        next_element = last_child.next_element
+
+        if self.previous_element is not None:
+            self.previous_element.next_element = next_element
+        if next_element is not None:
+            next_element.previous_element = self.previous_element
+        self.previous_element = None
+        last_child.next_element = None
+
+        self.parent = None
+        if self.previous_sibling is not None:
+            self.previous_sibling.next_sibling = self.next_sibling
+        if self.next_sibling is not None:
+            self.next_sibling.previous_sibling = self.previous_sibling
+        self.previous_sibling = self.next_sibling = None
+        return self
+
+    def _last_descendant(self):
+        "Finds the last element beneath this object to be parsed."
+        last_child = self
+        while hasattr(last_child, 'contents') and last_child.contents:
+            last_child = last_child.contents[-1]
+        return last_child
+    # BS3: Not part of the API!
+    _lastRecursiveChild = _last_descendant
+
+    def insert(self, position, new_child):
+        if new_child is self:
+            raise ValueError("Cannot insert a tag into itself.")
+        if (isinstance(new_child, basestring)
+            and not isinstance(new_child, NavigableString)):
+            new_child = NavigableString(new_child)
+
+        position = min(position, len(self.contents))
+        if hasattr(new_child, 'parent') and new_child.parent is not None:
+            # We're 'inserting' an element that's already one
+            # of this object's children.
+            if new_child.parent is self:
+                current_index = self.index(new_child)
+                if current_index < position:
+                    # We're moving this element further down the list
+                    # of this object's children. That means that when
+                    # we extract this element, our target index will
+                    # jump down one.
+                    position -= 1
+            new_child.extract()
+
+        new_child.parent = self
+        previous_child = None
+        if position == 0:
+            new_child.previous_sibling = None
+            new_child.previous_element = self
+        else:
+            previous_child = self.contents[position - 1]
+            new_child.previous_sibling = previous_child
+            new_child.previous_sibling.next_sibling = new_child
+            new_child.previous_element = previous_child._last_descendant()
+        if new_child.previous_element is not None:
+            new_child.previous_element.next_element = new_child
+
+        new_childs_last_element = new_child._last_descendant()
+
+        if position >= len(self.contents):
+            new_child.next_sibling = None
+
+            parent = self
+            parents_next_sibling = None
+            while parents_next_sibling is None and parent is not None:
+                parents_next_sibling = parent.next_sibling
+                parent = parent.parent
+                if parents_next_sibling is not None:
+                    # We found the element that comes next in the document.
+                    break
+            if parents_next_sibling is not None:
+                new_childs_last_element.next_element = parents_next_sibling
+            else:
+                # The last element of this tag is the last element in
+                # the document.
+                new_childs_last_element.next_element = None
+        else:
+            next_child = self.contents[position]
+            new_child.next_sibling = next_child
+            if new_child.next_sibling is not None:
+                new_child.next_sibling.previous_sibling = new_child
+            new_childs_last_element.next_element = next_child
+
+        if new_childs_last_element.next_element is not None:
+            new_childs_last_element.next_element.previous_element = new_childs_last_element
+        self.contents.insert(position, new_child)
+
+    def append(self, tag):
+        """Appends the given tag to the contents of this tag."""
+        self.insert(len(self.contents), tag)
+
+    def insert_before(self, predecessor):
+        """Makes the given element the immediate predecessor of this one.
+
+        The two elements will have the same parent, and the given element
+        will be immediately before this one.
+        """
+        if self is predecessor:
+            raise ValueError("Can't insert an element before itself.")
+        parent = self.parent
+        if parent is None:
+            raise ValueError(
+                "Element has no parent, so 'before' has no meaning.")
+        # Extract first so that the index won't be screwed up if they
+        # are siblings.
+        if isinstance(predecessor, PageElement):
+            predecessor.extract()
+        index = parent.index(self)
+        parent.insert(index, predecessor)
+
+    def insert_after(self, successor):
+        """Makes the given element the immediate successor of this one.
+
+        The two elements will have the same parent, and the given element
+        will be immediately after this one.
+        """
+        if self is successor:
+            raise ValueError("Can't insert an element after itself.")
+        parent = self.parent
+        if parent is None:
+            raise ValueError(
+                "Element has no parent, so 'after' has no meaning.")
+        # Extract first so that the index won't be screwed up if they
+        # are siblings.
+        if isinstance(successor, PageElement):
+            successor.extract()
+        index = parent.index(self)
+        parent.insert(index+1, successor)
+
+    def find_next(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears after this Tag in the document."""
+        return self._find_one(self.find_all_next, name, attrs, text, **kwargs)
+    findNext = find_next  # BS3
+
+    def find_all_next(self, name=None, attrs={}, text=None, limit=None,
+                    **kwargs):
+        """Returns all items that match the given criteria and appear
+        after this Tag in the document."""
+        return self._find_all(name, attrs, text, limit, self.next_elements,
+                             **kwargs)
+    findAllNext = find_all_next  # BS3
+
+    def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears after this Tag in the document."""
+        return self._find_one(self.find_next_siblings, name, attrs, text,
+                             **kwargs)
+    findNextSibling = find_next_sibling  # BS3
+
+    def find_next_siblings(self, name=None, attrs={}, text=None, limit=None,
+                           **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear after this Tag in the document."""
+        return self._find_all(name, attrs, text, limit,
+                              self.next_siblings, **kwargs)
+    findNextSiblings = find_next_siblings   # BS3
+    fetchNextSiblings = find_next_siblings  # BS2
+
+    def find_previous(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the first item that matches the given criteria and
+        appears before this Tag in the document."""
+        return self._find_one(
+            self.find_all_previous, name, attrs, text, **kwargs)
+    findPrevious = find_previous  # BS3
+
+    def find_all_previous(self, name=None, attrs={}, text=None, limit=None,
+                        **kwargs):
+        """Returns all items that match the given criteria and appear
+        before this Tag in the document."""
+        return self._find_all(name, attrs, text, limit, self.previous_elements,
+                           **kwargs)
+    findAllPrevious = find_all_previous  # BS3
+    fetchPrevious = find_all_previous    # BS2
+
+    def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
+        """Returns the closest sibling to this Tag that matches the
+        given criteria and appears before this Tag in the document."""
+        return self._find_one(self.find_previous_siblings, name, attrs, text,
+                             **kwargs)
+    findPreviousSibling = find_previous_sibling  # BS3
+
+    def find_previous_siblings(self, name=None, attrs={}, text=None,
+                               limit=None, **kwargs):
+        """Returns the siblings of this Tag that match the given
+        criteria and appear before this Tag in the document."""
+        return self._find_all(name, attrs, text, limit,
+                              self.previous_siblings, **kwargs)
+    findPreviousSiblings = find_previous_siblings   # BS3
+    fetchPreviousSiblings = find_previous_siblings  # BS2
+
+    def find_parent(self, name=None, attrs={}, **kwargs):
+        """Returns the closest parent of this Tag that matches the given
+        criteria."""
+        # NOTE: We can't use _find_one because findParents takes a different
+        # set of arguments.
+        r = None
+        l = self.find_parents(name, attrs, 1)
+        if l:
+            r = l[0]
+        return r
+    findParent = find_parent  # BS3
+
+    def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
+        """Returns the parents of this Tag that match the given
+        criteria."""
+
+        return self._find_all(name, attrs, None, limit, self.parents,
+                             **kwargs)
+    findParents = find_parents   # BS3
+    fetchParents = find_parents  # BS2
+
+    @property
+    def next(self):
+        return self.next_element
+
+    @property
+    def previous(self):
+        return self.previous_element
+
+    #These methods do the real heavy lifting.
+
+    def _find_one(self, method, name, attrs, text, **kwargs):
+        r = None
+        l = method(name, attrs, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+
+    def _find_all(self, name, attrs, text, limit, generator, **kwargs):
+        "Iterates over a generator looking for things that match."
+
+        if isinstance(name, SoupStrainer):
+            strainer = name
+        elif text is None and not limit and not attrs and not kwargs:
+            # Optimization to find all tags.
+            if name is True or name is None:
+                return [element for element in generator
+                        if isinstance(element, Tag)]
+            # Optimization to find all tags with a given name.
+            elif isinstance(name, basestring):
+                return [element for element in generator
+                        if isinstance(element, Tag) and element.name == name]
+            else:
+                strainer = SoupStrainer(name, attrs, text, **kwargs)
+        else:
+            # Build a SoupStrainer
+            strainer = SoupStrainer(name, attrs, text, **kwargs)
+        results = ResultSet(strainer)
+        while True:
+            try:
+                i = next(generator)
+            except StopIteration:
+                break
+            if i:
+                found = strainer.search(i)
+                if found:
+                    results.append(found)
+                    if limit and len(results) >= limit:
+                        break
+        return results
+
+    #These generators can be used to navigate starting from both
+    #NavigableStrings and Tags.
+    @property
+    def next_elements(self):
+        i = self.next_element
+        while i is not None:
+            yield i
+            i = i.next_element
+
+    @property
+    def next_siblings(self):
+        i = self.next_sibling
+        while i is not None:
+            yield i
+            i = i.next_sibling
+
+    @property
+    def previous_elements(self):
+        i = self.previous_element
+        while i is not None:
+            yield i
+            i = i.previous_element
+
+    @property
+    def previous_siblings(self):
+        i = self.previous_sibling
+        while i is not None:
+            yield i
+            i = i.previous_sibling
+
+    @property
+    def parents(self):
+        i = self.parent
+        while i is not None:
+            yield i
+            i = i.parent
+
+    # Methods for supporting CSS selectors.
+
+    tag_name_re = re.compile('^[a-z0-9]+$')
+
+    # /^(\w+)\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
+    #   \---/  \---/\-------------/    \-------/
+    #     |      |         |               |
+    #     |      |         |           The value
+    #     |      |    ~,|,^,$,* or =
+    #     |   Attribute
+    #    Tag
+    attribselect_re = re.compile(
+        r'^(?P<tag>\w+)?\[(?P<attribute>\w+)(?P<operator>[=~\|\^\$\*]?)' +
+        r'=?"?(?P<value>[^\]"]*)"?\]$'
+        )
+
+    def _attr_value_as_string(self, value, default=None):
+        """Force an attribute value into a string representation.
+
+        A multi-valued attribute will be converted into a
+        space-separated stirng.
+        """
+        value = self.get(value, default)
+        if isinstance(value, list) or isinstance(value, tuple):
+            value =" ".join(value)
+        return value
+
+    def _attribute_checker(self, operator, attribute, value=''):
+        """Create a function that performs a CSS selector operation.
+
+        Takes an operator, attribute and optional value. Returns a
+        function that will return True for elements that match that
+        combination.
+        """
+        if operator == '=':
+            # string representation of `attribute` is equal to `value`
+            return lambda el: el._attr_value_as_string(attribute) == value
+        elif operator == '~':
+            # space-separated list representation of `attribute`
+            # contains `value`
+            def _includes_value(element):
+                attribute_value = element.get(attribute, [])
+                if not isinstance(attribute_value, list):
+                    attribute_value = attribute_value.split()
+                return value in attribute_value
+            return _includes_value
+        elif operator == '^':
+            # string representation of `attribute` starts with `value`
+            return lambda el: el._attr_value_as_string(
+                attribute, '').startswith(value)
+        elif operator == '$':
+            # string represenation of `attribute` ends with `value`
+            return lambda el: el._attr_value_as_string(
+                attribute, '').endswith(value)
+        elif operator == '*':
+            # string representation of `attribute` contains `value`
+            return lambda el: value in el._attr_value_as_string(attribute, '')
+        elif operator == '|':
+            # string representation of `attribute` is either exactly
+            # `value` or starts with `value` and then a dash.
+            def _is_or_starts_with_dash(element):
+                attribute_value = element._attr_value_as_string(attribute, '')
+                return (attribute_value == value or attribute_value.startswith(
+                        value + '-'))
+            return _is_or_starts_with_dash
+        else:
+            return lambda el: el.has_attr(attribute)
+
+    def select(self, selector):
+        """Perform a CSS selection operation on the current element."""
+        tokens = selector.split()
+        current_context = [self]
+        for index, token in enumerate(tokens):
+            if tokens[index - 1] == '>':
+                # already found direct descendants in last step. skip this
+                # step.
+                continue
+            m = self.attribselect_re.match(token)
+            if m is not None:
+                # Attribute selector
+                tag, attribute, operator, value = m.groups()
+                if not tag:
+                    tag = True
+                checker = self._attribute_checker(operator, attribute, value)
+                found = []
+                for context in current_context:
+                    found.extend(
+                        [el for el in context.find_all(tag) if checker(el)])
+                current_context = found
+                continue
+
+            if '#' in token:
+                # ID selector
+                tag, id = token.split('#', 1)
+                if tag == "":
+                    tag = True
+                el = current_context[0].find(tag, {'id': id})
+                if el is None:
+                    return [] # No match
+                current_context = [el]
+                continue
+
+            if '.' in token:
+                # Class selector
+                tag_name, klass = token.split('.', 1)
+                if not tag_name:
+                    tag_name = True
+                classes = set(klass.split('.'))
+                found = []
+                def classes_match(tag):
+                    if tag_name is not True and tag.name != tag_name:
+                        return False
+                    if not tag.has_attr('class'):
+                        return False
+                    return classes.issubset(tag['class'])
+                for context in current_context:
+                    found.extend(context.find_all(classes_match))
+                current_context = found
+                continue
+
+            if token == '*':
+                # Star selector
+                found = []
+                for context in current_context:
+                    found.extend(context.findAll(True))
+                current_context = found
+                continue
+
+            if token == '>':
+                # Child selector
+                tag = tokens[index + 1]
+                if not tag:
+                    tag = True
+
+                found = []
+                for context in current_context:
+                    found.extend(context.find_all(tag, recursive=False))
+                current_context = found
+                continue
+
+            # Here we should just have a regular tag
+            if not self.tag_name_re.match(token):
+                return []
+            found = []
+            for context in current_context:
+                found.extend(context.findAll(token))
+            current_context = found
+        return current_context
+
+    # Old non-property versions of the generators, for backwards
+    # compatibility with BS3.
+    def nextGenerator(self):
+        return self.next_elements
+
+    def nextSiblingGenerator(self):
+        return self.next_siblings
+
+    def previousGenerator(self):
+        return self.previous_elements
+
+    def previousSiblingGenerator(self):
+        return self.previous_siblings
+
+    def parentGenerator(self):
+        return self.parents
+
+
+class NavigableString(unicode, PageElement):
+
+    PREFIX = ''
+    SUFFIX = ''
+
+    def __new__(cls, value):
+        """Create a new NavigableString.
+
+        When unpickling a NavigableString, this method is called with
+        the string in DEFAULT_OUTPUT_ENCODING. That encoding needs to be
+        passed in to the superclass's __new__ or the superclass won't know
+        how to handle non-ASCII characters.
+        """
+        if isinstance(value, unicode):
+            return unicode.__new__(cls, value)
+        return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
+
+    def __getnewargs__(self):
+        return (unicode(self),)
+
+    def __getattr__(self, attr):
+        """text.string gives you text. This is for backwards
+        compatibility for Navigable*String, but for CData* it lets you
+        get the string without the CData wrapper."""
+        if attr == 'string':
+            return self
+        else:
+            raise AttributeError(
+                "'%s' object has no attribute '%s'" % (
+                    self.__class__.__name__, attr))
+
+    def output_ready(self, formatter="minimal"):
+        output = self.format_string(self, formatter)
+        return self.PREFIX + output + self.SUFFIX
+
+
+class PreformattedString(NavigableString):
+    """A NavigableString not subject to the normal formatting rules.
+
+    The string will be passed into the formatter (to trigger side effects),
+    but the return value will be ignored.
+    """
+
+    def output_ready(self, formatter="minimal"):
+        """CData strings are passed into the formatter.
+        But the return value is ignored."""
+        self.format_string(self, formatter)
+        return self.PREFIX + self + self.SUFFIX
+
+class CData(PreformattedString):
+
+    PREFIX = u'<![CDATA['
+    SUFFIX = u']]>'
+
+class ProcessingInstruction(PreformattedString):
+
+    PREFIX = u'<?'
+    SUFFIX = u'?>'
+
+class Comment(PreformattedString):
+
+    PREFIX = u'<!--'
+    SUFFIX = u'-->'
+
+
+class Declaration(PreformattedString):
+    PREFIX = u'<!'
+    SUFFIX = u'!>'
+
+
+class Doctype(PreformattedString):
+
+    @classmethod
+    def for_name_and_ids(cls, name, pub_id, system_id):
+        value = name
+        if pub_id is not None:
+            value += ' PUBLIC "%s"' % pub_id
+            if system_id is not None:
+                value += ' "%s"' % system_id
+        elif system_id is not None:
+            value += ' SYSTEM "%s"' % system_id
+
+        return Doctype(value)
+
+    PREFIX = u'<!DOCTYPE '
+    SUFFIX = u'>\n'
+
+
+class Tag(PageElement):
+
+    """Represents a found HTML tag with its attributes and contents."""
+
+    def __init__(self, parser=None, builder=None, name=None, namespace=None,
+                 prefix=None, attrs=None, parent=None, previous=None):
+        "Basic constructor."
+
+        if parser is None:
+            self.parser_class = None
+        else:
+            # We don't actually store the parser object: that lets extracted
+            # chunks be garbage-collected.
+            self.parser_class = parser.__class__
+        if name is None:
+            raise ValueError("No value provided for new tag's name.")
+        self.name = name
+        self.namespace = namespace
+        self.prefix = prefix
+        if attrs is None:
+            attrs = {}
+        elif builder.cdata_list_attributes:
+            attrs = builder._replace_cdata_list_attribute_values(
+                self.name, attrs)
+        else:
+            attrs = dict(attrs)
+        self.attrs = attrs
+        self.contents = []
+        self.setup(parent, previous)
+        self.hidden = False
+
+        # Set up any substitutions, such as the charset in a META tag.
+        if builder is not None:
+            builder.set_up_substitutions(self)
+            self.can_be_empty_element = builder.can_be_empty_element(name)
+        else:
+            self.can_be_empty_element = False
+
+    parserClass = _alias("parser_class")  # BS3
+
+    @property
+    def is_empty_element(self):
+        """Is this tag an empty-element tag? (aka a self-closing tag)
+
+        A tag that has contents is never an empty-element tag.
+
+        A tag that has no contents may or may not be an empty-element
+        tag. It depends on the builder used to create the tag. If the
+        builder has a designated list of empty-element tags, then only
+        a tag whose name shows up in that list is considered an
+        empty-element tag.
+
+        If the builder has no designated list of empty-element tags,
+        then any tag with no contents is an empty-element tag.
+        """
+        return len(self.contents) == 0 and self.can_be_empty_element
+    isSelfClosing = is_empty_element  # BS3
+
+    @property
+    def string(self):
+        """Convenience property to get the single string within this tag.
+
+        :Return: If this tag has a single string child, return value
+         is that string. If this tag has no children, or more than one
+         child, return value is None. If this tag has one child tag,
+         return value is the 'string' attribute of the child tag,
+         recursively.
+        """
+        if len(self.contents) != 1:
+            return None
+        child = self.contents[0]
+        if isinstance(child, NavigableString):
+            return child
+        return child.string
+
+    @string.setter
+    def string(self, string):
+        self.clear()
+        self.append(string.__class__(string))
+
+    def _all_strings(self, strip=False):
+        """Yield all child strings, possibly stripping them."""
+        for descendant in self.descendants:
+            if not isinstance(descendant, NavigableString):
+                continue
+            if strip:
+                descendant = descendant.strip()
+                if len(descendant) == 0:
+                    continue
+            yield descendant
+    strings = property(_all_strings)
+
+    @property
+    def stripped_strings(self):
+        for string in self._all_strings(True):
+            yield string
+
+    def get_text(self, separator="", strip=False):
+        """
+        Get all child strings, concatenated using the given separator.
+        """
+        return separator.join([s for s in self._all_strings(strip)])
+    getText = get_text
+    text = property(get_text)
+
+    def decompose(self):
+        """Recursively destroys the contents of this tree."""
+        self.extract()
+        i = self
+        while i is not None:
+            next = i.next_element
+            i.__dict__.clear()
+            i = next
+
+    def clear(self, decompose=False):
+        """
+        Extract all children. If decompose is True, decompose instead.
+        """
+        if decompose:
+            for element in self.contents[:]:
+                if isinstance(element, Tag):
+                    element.decompose()
+                else:
+                    element.extract()
+        else:
+            for element in self.contents[:]:
+                element.extract()
+
+    def index(self, element):
+        """
+        Find the index of a child by identity, not value. Avoids issues with
+        tag.contents.index(element) getting the index of equal elements.
+        """
+        for i, child in enumerate(self.contents):
+            if child is element:
+                return i
+        raise ValueError("Tag.index: element not in tag")
+
+    def get(self, key, default=None):
+        """Returns the value of the 'key' attribute for the tag, or
+        the value given for 'default' if it doesn't have that
+        attribute."""
+        return self.attrs.get(key, default)
+
+    def has_attr(self, key):
+        return key in self.attrs
+
+    def __hash__(self):
+        return str(self).__hash__()
+
+    def __getitem__(self, key):
+        """tag[key] returns the value of the 'key' attribute for the tag,
+        and throws an exception if it's not there."""
+        return self.attrs[key]
+
+    def __iter__(self):
+        "Iterating over a tag iterates over its contents."
+        return iter(self.contents)
+
+    def __len__(self):
+        "The length of a tag is the length of its list of contents."
+        return len(self.contents)
+
+    def __contains__(self, x):
+        return x in self.contents
+
+    def __nonzero__(self):
+        "A tag is non-None even if it has no contents."
+        return True
+
+    def __setitem__(self, key, value):
+        """Setting tag[key] sets the value of the 'key' attribute for the
+        tag."""
+        self.attrs[key] = value
+
+    def __delitem__(self, key):
+        "Deleting tag[key] deletes all 'key' attributes for the tag."
+        self.attrs.pop(key, None)
+
+    def __call__(self, *args, **kwargs):
+        """Calling a tag like a function is the same as calling its
+        find_all() method. Eg. tag('a') returns a list of all the A tags
+        found within this tag."""
+        return self.find_all(*args, **kwargs)
+
+    def __getattr__(self, tag):
+        #print "Getattr %s.%s" % (self.__class__, tag)
+        if len(tag) > 3 and tag.endswith('Tag'):
+            # BS3: soup.aTag -> "soup.find("a")
+            tag_name = tag[:-3]
+            warnings.warn(
+                '.%sTag is deprecated, use .find("%s") instead.' % (
+                    tag_name, tag_name))
+            return self.find(tag_name)
+        # We special case contents to avoid recursion.
+        elif not tag.startswith("__") and not tag=="contents":
+            return self.find(tag)
+        raise AttributeError(
+            "'%s' object has no attribute '%s'" % (self.__class__, tag))
+
+    def __eq__(self, other):
+        """Returns true iff this tag has the same name, the same attributes,
+        and the same contents (recursively) as the given tag."""
+        if self is other:
+            return True
+        if (not hasattr(other, 'name') or
+            not hasattr(other, 'attrs') or
+            not hasattr(other, 'contents') or
+            self.name != other.name or
+            self.attrs != other.attrs or
+            len(self) != len(other)):
+            return False
+        for i, my_child in enumerate(self.contents):
+            if my_child != other.contents[i]:
+                return False
+        return True
+
+    def __ne__(self, other):
+        """Returns true iff this tag is not identical to the other tag,
+        as defined in __eq__."""
+        return not self == other
+
+    def __repr__(self, encoding=DEFAULT_OUTPUT_ENCODING):
+        """Renders this tag as a string."""
+        return self.encode(encoding)
+
+    def __unicode__(self):
+        return self.decode()
+
+    def __str__(self):
+        return self.encode()
+
+    if PY3K:
+        __str__ = __repr__ = __unicode__
+
+    def encode(self, encoding=DEFAULT_OUTPUT_ENCODING,
+               indent_level=None, formatter="minimal",
+               errors="xmlcharrefreplace"):
+        # Turn the data structure into Unicode, then encode the
+        # Unicode.
+        u = self.decode(indent_level, encoding, formatter)
+        return u.encode(encoding, errors)
+
+    def decode(self, indent_level=None,
+               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+               formatter="minimal"):
+        """Returns a Unicode representation of this tag and its contents.
+
+        :param eventual_encoding: The tag is destined to be
+           encoded into this encoding. This method is _not_
+           responsible for performing that encoding. This information
+           is passed in so that it can be substituted in if the
+           document contains a <META> tag that mentions the document's
+           encoding.
+        """
+        attrs = []
+        if self.attrs:
+            for key, val in sorted(self.attrs.items()):
+                if val is None:
+                    decoded = key
+                else:
+                    if isinstance(val, list) or isinstance(val, tuple):
+                        val = ' '.join(val)
+                    elif not isinstance(val, basestring):
+                        val = str(val)
+                    elif (
+                        isinstance(val, AttributeValueWithCharsetSubstitution)
+                        and eventual_encoding is not None):
+                        val = val.encode(eventual_encoding)
+
+                    text = self.format_string(val, formatter)
+                    decoded = (
+                        str(key) + '='
+                        + EntitySubstitution.quoted_attribute_value(text))
+                attrs.append(decoded)
+        close = ''
+        closeTag = ''
+        if self.is_empty_element:
+            close = '/'
+        else:
+            closeTag = '</%s>' % self.name
+
+        prefix = ''
+        if self.prefix:
+            prefix = self.prefix + ":"
+
+        pretty_print = (indent_level is not None)
+        if pretty_print:
+            space = (' ' * (indent_level - 1))
+            indent_contents = indent_level + 1
+        else:
+            space = ''
+            indent_contents = None
+        contents = self.decode_contents(
+            indent_contents, eventual_encoding, formatter)
+
+        if self.hidden:
+            # This is the 'document root' object.
+            s = contents
+        else:
+            s = []
+            attribute_string = ''
+            if attrs:
+                attribute_string = ' ' + ' '.join(attrs)
+            if pretty_print:
+                s.append(space)
+            s.append('<%s%s%s%s>' % (
+                    prefix, self.name, attribute_string, close))
+            if pretty_print:
+                s.append("\n")
+            s.append(contents)
+            if pretty_print and contents and contents[-1] != "\n":
+                s.append("\n")
+            if pretty_print and closeTag:
+                s.append(space)
+            s.append(closeTag)
+            if pretty_print and closeTag and self.next_sibling:
+                s.append("\n")
+            s = ''.join(s)
+        return s
+
+    def prettify(self, encoding=None, formatter="minimal"):
+        if encoding is None:
+            return self.decode(True, formatter=formatter)
+        else:
+            return self.encode(encoding, True, formatter=formatter)
+
+    def decode_contents(self, indent_level=None,
+                       eventual_encoding=DEFAULT_OUTPUT_ENCODING,
+                       formatter="minimal"):
+        """Renders the contents of this tag as a Unicode string.
+
+        :param eventual_encoding: The tag is destined to be
+           encoded into this encoding. This method is _not_
+           responsible for performing that encoding. This information
+           is passed in so that it can be substituted in if the
+           document contains a <META> tag that mentions the document's
+           encoding.
+        """
+        pretty_print = (indent_level is not None)
+        s = []
+        for c in self:
+            text = None
+            if isinstance(c, NavigableString):
+                text = c.output_ready(formatter)
+            elif isinstance(c, Tag):
+                s.append(c.decode(indent_level, eventual_encoding,
+                                  formatter))
+            if text and indent_level:
+                text = text.strip()
+            if text:
+                if pretty_print:
+                    s.append(" " * (indent_level - 1))
+                s.append(text)
+                if pretty_print:
+                    s.append("\n")
+        return ''.join(s)
+
+    def encode_contents(
+        self, indent_level=None, encoding=DEFAULT_OUTPUT_ENCODING,
+        formatter="minimal"):
+        """Renders the contents of this tag as a bytestring."""
+        contents = self.decode_contents(indent_level, encoding, formatter)
+        return contents.encode(encoding)
+
+    # Old method for BS3 compatibility
+    def renderContents(self, encoding=DEFAULT_OUTPUT_ENCODING,
+                       prettyPrint=False, indentLevel=0):
+        if not prettyPrint:
+            indentLevel = None
+        return self.encode_contents(
+            indent_level=indentLevel, encoding=encoding)
+
+    #Soup methods
+
+    def find(self, name=None, attrs={}, recursive=True, text=None,
+             **kwargs):
+        """Return only the first child of this Tag matching the given
+        criteria."""
+        r = None
+        l = self.find_all(name, attrs, recursive, text, 1, **kwargs)
+        if l:
+            r = l[0]
+        return r
+    findChild = find
+
+    def find_all(self, name=None, attrs={}, recursive=True, text=None,
+                 limit=None, **kwargs):
+        """Extracts a list of Tag objects that match the given
+        criteria.  You can specify the name of the Tag and any
+        attributes you want the Tag to have.
+
+        The value of a key-value pair in the 'attrs' map can be a
+        string, a list of strings, a regular expression object, or a
+        callable that takes a string and returns whether or not the
+        string matches for some custom definition of 'matches'. The
+        same is true of the tag name."""
+        generator = self.descendants
+        if not recursive:
+            generator = self.children
+        return self._find_all(name, attrs, text, limit, generator, **kwargs)
+    findAll = find_all       # BS3
+    findChildren = find_all  # BS2
+
+    #Generator methods
+    @property
+    def children(self):
+        # return iter() to make the purpose of the method clear
+        return iter(self.contents)  # XXX This seems to be untested.
+
+    @property
+    def descendants(self):
+        if not len(self.contents):
+            return
+        stopNode = self._last_descendant().next_element
+        current = self.contents[0]
+        while current is not stopNode:
+            yield current
+            current = current.next_element
+
+    # Old names for backwards compatibility
+    def childGenerator(self):
+        return self.children
+
+    def recursiveChildGenerator(self):
+        return self.descendants
+
+    # This was kind of misleading because has_key() (attributes) was
+    # different from __in__ (contents). has_key() is gone in Python 3,
+    # anyway.
+    has_key = has_attr
+
+# Next, a couple classes to represent queries and their results.
+class SoupStrainer(object):
+    """Encapsulates a number of ways of matching a markup element (tag or
+    text)."""
+
+    def __init__(self, name=None, attrs={}, text=None, **kwargs):
+        self.name = self._normalize_search_value(name)
+        if not isinstance(attrs, dict):
+            # Treat a non-dict value for attrs as a search for the 'class'
+            # attribute.
+            kwargs['class'] = attrs
+            attrs = None
+
+        if kwargs:
+            if attrs:
+                attrs = attrs.copy()
+                attrs.update(kwargs)
+            else:
+                attrs = kwargs
+        normalized_attrs = {}
+        for key, value in attrs.items():
+            normalized_attrs[key] = self._normalize_search_value(value)
+
+        self.attrs = normalized_attrs
+        self.text = self._normalize_search_value(text)
+
+    def _normalize_search_value(self, value):
+        # Leave it alone if it's a Unicode string, a callable, a
+        # regular expression, a boolean, or None.
+        if (isinstance(value, unicode) or callable(value) or hasattr(value, 'match')
+            or isinstance(value, bool) or value is None):
+            return value
+
+        # If it's a bytestring, convert it to Unicode, treating it as UTF-8.
+        if isinstance(value, bytes):
+            return value.decode("utf8")
+
+        # If it's listlike, convert it into a list of strings.
+        if hasattr(value, '__iter__'):
+            new_value = []
+            for v in value:
+                if (hasattr(v, '__iter__') and not isinstance(v, bytes)
+                    and not isinstance(v, unicode)):
+                    # This is almost certainly the user's mistake. In the
+                    # interests of avoiding infinite loops, we'll let
+                    # it through as-is rather than doing a recursive call.
+                    new_value.append(v)
+                else:
+                    new_value.append(self._normalize_search_value(v))
+            return new_value
+
+        # Otherwise, convert it into a Unicode string.
+        # The unicode(str()) thing is so this will do the same thing on Python 2
+        # and Python 3.
+        return unicode(str(value))
+
+    def __str__(self):
+        if self.text:
+            return self.text
+        else:
+            return "%s|%s" % (self.name, self.attrs)
+
+    def search_tag(self, markup_name=None, markup_attrs={}):
+        found = None
+        markup = None
+        if isinstance(markup_name, Tag):
+            markup = markup_name
+            markup_attrs = markup
+        call_function_with_tag_data = (
+            isinstance(self.name, collections.Callable)
+            and not isinstance(markup_name, Tag))
+
+        if ((not self.name)
+            or call_function_with_tag_data
+            or (markup and self._matches(markup, self.name))
+            or (not markup and self._matches(markup_name, self.name))):
+            if call_function_with_tag_data:
+                match = self.name(markup_name, markup_attrs)
+            else:
+                match = True
+                markup_attr_map = None
+                for attr, match_against in list(self.attrs.items()):
+                    if not markup_attr_map:
+                        if hasattr(markup_attrs, 'get'):
+                            markup_attr_map = markup_attrs
+                        else:
+                            markup_attr_map = {}
+                            for k, v in markup_attrs:
+                                markup_attr_map[k] = v
+                    attr_value = markup_attr_map.get(attr)
+                    if not self._matches(attr_value, match_against):
+                        match = False
+                        break
+            if match:
+                if markup:
+                    found = markup
+                else:
+                    found = markup_name
+        if found and self.text and not self._matches(found.string, self.text):
+            found = None
+        return found
+    searchTag = search_tag
+
+    def search(self, markup):
+        # print 'looking for %s in %s' % (self, markup)
+        found = None
+        # If given a list of items, scan it for a text element that
+        # matches.
+        if hasattr(markup, '__iter__') and not isinstance(markup, (Tag, basestring)):
+            for element in markup:
+                if isinstance(element, NavigableString) \
+                       and self.search(element):
+                    found = element
+                    break
+        # If it's a Tag, make sure its name or attributes match.
+        # Don't bother with Tags if we're searching for text.
+        elif isinstance(markup, Tag):
+            if not self.text or self.name or self.attrs:
+                found = self.search_tag(markup)
+        # If it's text, make sure the text matches.
+        elif isinstance(markup, NavigableString) or \
+                 isinstance(markup, basestring):
+            if not self.name and not self.attrs and self._matches(markup, self.text):
+                found = markup
+        else:
+            raise Exception(
+                "I don't know how to match against a %s" % markup.__class__)
+        return found
+
+    def _matches(self, markup, match_against):
+        # print u"Matching %s against %s" % (markup, match_against)
+        result = False
+        if isinstance(markup, list) or isinstance(markup, tuple):
+            # This should only happen when searching a multi-valued attribute
+            # like 'class'.
+            if (isinstance(match_against, unicode)
+                and ' ' in match_against):
+                # A bit of a special case. If they try to match "foo
+                # bar" on a multivalue attribute's value, only accept
+                # the literal value "foo bar"
+                #
+                # XXX This is going to be pretty slow because we keep
+                # splitting match_against. But it shouldn't come up
+                # too often.
+                return (whitespace_re.split(match_against) == markup)
+            else:
+                for item in markup:
+                    if self._matches(item, match_against):
+                        return True
+                return False
+
+        if match_against is True:
+            # True matches any non-None value.
+            return markup is not None
+
+        if isinstance(match_against, collections.Callable):
+            return match_against(markup)
+
+        # Custom callables take the tag as an argument, but all
+        # other ways of matching match the tag name as a string.
+        if isinstance(markup, Tag):
+            markup = markup.name
+
+        # Ensure that `markup` is either a Unicode string, or None.
+        markup = self._normalize_search_value(markup)
+
+        if markup is None:
+            # None matches None, False, an empty string, an empty list, and so on.
+            return not match_against
+
+        if isinstance(match_against, unicode):
+            # Exact string match
+            return markup == match_against
+
+        if hasattr(match_against, 'match'):
+            # Regexp match
+            return match_against.search(markup)
+
+        if hasattr(match_against, '__iter__'):
+            # The markup must be an exact match against something
+            # in the iterable.
+            return markup in match_against
+
+
+class ResultSet(list):
+    """A ResultSet is just a list that keeps track of the SoupStrainer
+    that created it."""
+    def __init__(self, source):
+        list.__init__([])
+        self.source = source
diff --git a/lib/bs4/testing.py b/lib/bs4/testing.py
new file mode 100644
index 00000000..5a84b0ba
--- /dev/null
+++ b/lib/bs4/testing.py
@@ -0,0 +1,515 @@
+"""Helper classes for tests."""
+
+import copy
+import functools
+import unittest
+from unittest import TestCase
+from bs4 import BeautifulSoup
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    Comment,
+    ContentMetaAttributeValue,
+    Doctype,
+    SoupStrainer,
+)
+
+from bs4.builder import HTMLParserTreeBuilder
+default_builder = HTMLParserTreeBuilder
+
+
+class SoupTest(unittest.TestCase):
+
+    @property
+    def default_builder(self):
+        return default_builder()
+
+    def soup(self, markup, **kwargs):
+        """Build a Beautiful Soup object from markup."""
+        builder = kwargs.pop('builder', self.default_builder)
+        return BeautifulSoup(markup, builder=builder, **kwargs)
+
+    def document_for(self, markup):
+        """Turn an HTML fragment into a document.
+
+        The details depend on the builder.
+        """
+        return self.default_builder.test_fragment_to_document(markup)
+
+    def assertSoupEquals(self, to_parse, compare_parsed_to=None):
+        builder = self.default_builder
+        obj = BeautifulSoup(to_parse, builder=builder)
+        if compare_parsed_to is None:
+            compare_parsed_to = to_parse
+
+        self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
+
+
+class HTMLTreeBuilderSmokeTest(object):
+
+    """A basic test of a treebuilder's competence.
+
+    Any HTML treebuilder, present or future, should be able to pass
+    these tests. With invalid markup, there's room for interpretation,
+    and different parsers can handle it differently. But with the
+    markup in these tests, there's not much room for interpretation.
+    """
+
+    def assertDoctypeHandled(self, doctype_fragment):
+        """Assert that a given doctype string is handled correctly."""
+        doctype_str, soup = self._document_with_doctype(doctype_fragment)
+
+        # Make sure a Doctype object was created.
+        doctype = soup.contents[0]
+        self.assertEqual(doctype.__class__, Doctype)
+        self.assertEqual(doctype, doctype_fragment)
+        self.assertEqual(str(soup)[:len(doctype_str)], doctype_str)
+
+        # Make sure that the doctype was correctly associated with the
+        # parse tree and that the rest of the document parsed.
+        self.assertEqual(soup.p.contents[0], 'foo')
+
+    def _document_with_doctype(self, doctype_fragment):
+        """Generate and parse a document with the given doctype."""
+        doctype = '<!DOCTYPE %s>' % doctype_fragment
+        markup = doctype + '\n<p>foo</p>'
+        soup = self.soup(markup)
+        return doctype, soup
+
+    def test_normal_doctypes(self):
+        """Make sure normal, everyday HTML doctypes are handled correctly."""
+        self.assertDoctypeHandled("html")
+        self.assertDoctypeHandled(
+            'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"')
+
+    def test_public_doctype_with_url(self):
+        doctype = 'html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'
+        self.assertDoctypeHandled(doctype)
+
+    def test_system_doctype(self):
+        self.assertDoctypeHandled('foo SYSTEM "http://www.example.com/"')
+
+    def test_namespaced_system_doctype(self):
+        # We can handle a namespaced doctype with a system ID.
+        self.assertDoctypeHandled('xsl:stylesheet SYSTEM "htmlent.dtd"')
+
+    def test_namespaced_public_doctype(self):
+        # Test a namespaced doctype with a public id.
+        self.assertDoctypeHandled('xsl:stylesheet PUBLIC "htmlent.dtd"')
+
+    def test_real_xhtml_document(self):
+        """A real XHTML document should come out more or less the same as it went in."""
+        markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(
+            soup.encode("utf-8").replace(b"\n", b""),
+            markup.replace(b"\n", b""))
+
+    def test_deepcopy(self):
+        """Make sure you can copy the tree builder.
+
+        This is important because the builder is part of a
+        BeautifulSoup object, and we want to be able to copy that.
+        """
+        copy.deepcopy(self.default_builder)
+
+    def test_p_tag_is_never_empty_element(self):
+        """A <p> tag is never designated as an empty-element tag.
+
+        Even if the markup shows it as an empty-element tag, it
+        shouldn't be presented that way.
+        """
+        soup = self.soup("<p/>")
+        self.assertFalse(soup.p.is_empty_element)
+        self.assertEqual(str(soup.p), "<p></p>")
+
+    def test_unclosed_tags_get_closed(self):
+        """A tag that's not closed by the end of the document should be closed.
+
+        This applies to all tags except empty-element tags.
+        """
+        self.assertSoupEquals("<p>", "<p></p>")
+        self.assertSoupEquals("<b>", "<b></b>")
+
+        self.assertSoupEquals("<br>", "<br/>")
+
+    def test_br_is_always_empty_element_tag(self):
+        """A <br> tag is designated as an empty-element tag.
+
+        Some parsers treat <br></br> as one <br/> tag, some parsers as
+        two tags, but it should always be an empty-element tag.
+        """
+        soup = self.soup("<br></br>")
+        self.assertTrue(soup.br.is_empty_element)
+        self.assertEqual(str(soup.br), "<br/>")
+
+    def test_nested_formatting_elements(self):
+        self.assertSoupEquals("<em><em></em></em>")
+
+    def test_comment(self):
+        # Comments are represented as Comment objects.
+        markup = "<p>foo<!--foobar-->baz</p>"
+        self.assertSoupEquals(markup)
+
+        soup = self.soup(markup)
+        comment = soup.find(text="foobar")
+        self.assertEqual(comment.__class__, Comment)
+
+    def test_preserved_whitespace_in_pre_and_textarea(self):
+        """Whitespace must be preserved in <pre> and <textarea> tags."""
+        self.assertSoupEquals("<pre>   </pre>")
+        self.assertSoupEquals("<textarea> woo  </textarea>")
+
+    def test_nested_inline_elements(self):
+        """Inline elements can be nested indefinitely."""
+        b_tag = "<b>Inside a B tag</b>"
+        self.assertSoupEquals(b_tag)
+
+        nested_b_tag = "<p>A <i>nested <b>tag</b></i></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+        double_nested_b_tag = "<p>A <a>doubly <i>nested <b>tag</b></i></a></p>"
+        self.assertSoupEquals(nested_b_tag)
+
+    def test_nested_block_level_elements(self):
+        """Block elements can be nested."""
+        soup = self.soup('<blockquote><p><b>Foo</b></p></blockquote>')
+        blockquote = soup.blockquote
+        self.assertEqual(blockquote.p.b.string, 'Foo')
+        self.assertEqual(blockquote.b.string, 'Foo')
+
+    def test_correctly_nested_tables(self):
+        """One table can go inside another one."""
+        markup = ('<table id="1">'
+                  '<tr>'
+                  "<td>Here's another table:"
+                  '<table id="2">'
+                  '<tr><td>foo</td></tr>'
+                  '</table></td>')
+
+        self.assertSoupEquals(
+            markup,
+            '<table id="1"><tr><td>Here\'s another table:'
+            '<table id="2"><tr><td>foo</td></tr></table>'
+            '</td></tr></table>')
+
+        self.assertSoupEquals(
+            "<table><thead><tr><td>Foo</td></tr></thead>"
+            "<tbody><tr><td>Bar</td></tr></tbody>"
+            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
+
+    def test_angle_brackets_in_attribute_values_are_escaped(self):
+        self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
+
+    def test_entities_in_attributes_converted_to_unicode(self):
+        expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
+        self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
+        self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
+
+    def test_entities_in_text_converted_to_unicode(self):
+        expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
+        self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
+        self.assertSoupEquals("<p>pi&ntilde;ata</p>", expect)
+
+    def test_quot_entity_converted_to_quotation_mark(self):
+        self.assertSoupEquals("<p>I said &quot;good day!&quot;</p>",
+                              '<p>I said "good day!"</p>')
+
+    def test_out_of_range_entity(self):
+        expect = u"\N{REPLACEMENT CHARACTER}"
+        self.assertSoupEquals("&#10000000000000;", expect)
+        self.assertSoupEquals("&#x10000000000000;", expect)
+        self.assertSoupEquals("&#1000000000;", expect)
+
+    def test_basic_namespaces(self):
+        """Parsers don't need to *understand* namespaces, but at the
+        very least they should not choke on namespaces or lose
+        data."""
+
+        markup = b'<html xmlns="http://www.w3.org/1999/xhtml" xmlns:mathml="http://www.w3.org/1998/Math/MathML" xmlns:svg="http://www.w3.org/2000/svg"><head></head><body><mathml:msqrt>4</mathml:msqrt><b svg:fill="red"></b></body></html>'
+        soup = self.soup(markup)
+        self.assertEqual(markup, soup.encode())
+        html = soup.html
+        self.assertEqual('http://www.w3.org/1999/xhtml', soup.html['xmlns'])
+        self.assertEqual(
+            'http://www.w3.org/1998/Math/MathML', soup.html['xmlns:mathml'])
+        self.assertEqual(
+            'http://www.w3.org/2000/svg', soup.html['xmlns:svg'])
+
+    def test_multivalued_attribute_value_becomes_list(self):
+        markup = b'<a class="foo bar">'
+        soup = self.soup(markup)
+        self.assertEqual(['foo', 'bar'], soup.a['class'])
+
+    #
+    # Generally speaking, tests below this point are more tests of
+    # Beautiful Soup than tests of the tree builders. But parsers are
+    # weird, so we run these tests separately for every tree builder
+    # to detect any differences between them.
+    #
+
+    def test_soupstrainer(self):
+        """Parsers should be able to work with SoupStrainers."""
+        strainer = SoupStrainer("b")
+        soup = self.soup("A <b>bold</b> <meta/> <i>statement</i>",
+                         parse_only=strainer)
+        self.assertEqual(soup.decode(), "<b>bold</b>")
+
+    def test_single_quote_attribute_values_become_double_quotes(self):
+        self.assertSoupEquals("<foo attr='bar'></foo>",
+                              '<foo attr="bar"></foo>')
+
+    def test_attribute_values_with_nested_quotes_are_left_alone(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        self.assertSoupEquals(text)
+
+    def test_attribute_values_with_double_nested_quotes_get_quoted(self):
+        text = """<foo attr='bar "brawls" happen'>a</foo>"""
+        soup = self.soup(text)
+        soup.foo['attr'] = 'Brawls happen at "Bob\'s Bar"'
+        self.assertSoupEquals(
+            soup.foo.decode(),
+            """<foo attr="Brawls happen at &quot;Bob\'s Bar&quot;">a</foo>""")
+
+    def test_ampersand_in_attribute_value_gets_escaped(self):
+        self.assertSoupEquals('<this is="really messed up & stuff"></this>',
+                              '<this is="really messed up &amp; stuff"></this>')
+
+        self.assertSoupEquals(
+            '<a href="http://example.org?a=1&b=2;3">foo</a>',
+            '<a href="http://example.org?a=1&amp;b=2;3">foo</a>')
+
+    def test_escaped_ampersand_in_attribute_value_is_left_alone(self):
+        self.assertSoupEquals('<a href="http://example.org?a=1&amp;b=2;3"></a>')
+
+    def test_entities_in_strings_converted_during_parsing(self):
+        # Both XML and HTML entities are converted to Unicode characters
+        # during parsing.
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
+        self.assertSoupEquals(text, expected)
+
+    def test_smart_quotes_converted_on_the_way_in(self):
+        # Microsoft smart quotes are converted to Unicode characters during
+        # parsing.
+        quote = b"<p>\x91Foo\x92</p>"
+        soup = self.soup(quote)
+        self.assertEqual(
+            soup.p.string,
+            u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
+
+    def test_non_breaking_spaces_converted_on_the_way_in(self):
+        soup = self.soup("<a>&nbsp;&nbsp;</a>")
+        self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2)
+
+    def test_entities_converted_on_the_way_out(self):
+        text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
+        expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
+        soup = self.soup(text)
+        self.assertEqual(soup.p.encode("utf-8"), expected)
+
+    def test_real_iso_latin_document(self):
+        # Smoke test of interrelated functionality, using an
+        # easy-to-understand document.
+
+        # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
+        unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
+
+        # That's because we're going to encode it into ISO-Latin-1, and use
+        # that to test.
+        iso_latin_html = unicode_html.encode("iso-8859-1")
+
+        # Parse the ISO-Latin-1 HTML.
+        soup = self.soup(iso_latin_html)
+        # Encode it to UTF-8.
+        result = soup.encode("utf-8")
+
+        # What do we expect the result to look like? Well, it would
+        # look like unicode_html, except that the META tag would say
+        # UTF-8 instead of ISO-Latin-1.
+        expected = unicode_html.replace("ISO-Latin-1", "utf-8")
+
+        # And, of course, it would be in UTF-8, not Unicode.
+        expected = expected.encode("utf-8")
+
+        # Ta-da!
+        self.assertEqual(result, expected)
+
+    def test_real_shift_jis_document(self):
+        # Smoke test to make sure the parser can handle a document in
+        # Shift-JIS encoding, without choking.
+        shift_jis_html = (
+            b'<html><head></head><body><pre>'
+            b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+            b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+            b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B'
+            b'</pre></body></html>')
+        unicode_html = shift_jis_html.decode("shift-jis")
+        soup = self.soup(unicode_html)
+
+        # Make sure the parse tree is correctly encoded to various
+        # encodings.
+        self.assertEqual(soup.encode("utf-8"), unicode_html.encode("utf-8"))
+        self.assertEqual(soup.encode("euc_jp"), unicode_html.encode("euc_jp"))
+
+    def test_real_hebrew_document(self):
+        # A real-world test to make sure we can convert ISO-8859-9 (a
+        # Hebrew encoding) to UTF-8.
+        hebrew_document = b'<html><head><title>Hebrew (ISO 8859-8) in Visual Directionality</title></head><body><h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\xed\xe5\xec\xf9</body></html>'
+        soup = self.soup(
+            hebrew_document, from_encoding="iso8859-8")
+        self.assertEqual(soup.original_encoding, 'iso8859-8')
+        self.assertEqual(
+            soup.encode('utf-8'),
+            hebrew_document.decode("iso8859-8").encode("utf-8"))
+
+    def test_meta_tag_reflects_current_encoding(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/>')
+
+        # Here's a document incorporating that meta tag.
+        shift_jis_html = (
+            '<html><head>\n%s\n'
+            '<meta http-equiv="Content-language" content="ja"/>'
+            '</head><body>Shift-JIS markup goes here.') % meta_tag
+        soup = self.soup(shift_jis_html)
+
+        # Parse the document, and the charset is seemingly unaffected.
+        parsed_meta = soup.find('meta', {'http-equiv': 'Content-type'})
+        content = parsed_meta['content']
+        self.assertEqual('text/html; charset=x-sjis', content)
+
+        # But that value is actually a ContentMetaAttributeValue object.
+        self.assertTrue(isinstance(content, ContentMetaAttributeValue))
+
+        # And it will take on a value that reflects its current
+        # encoding.
+        self.assertEqual('text/html; charset=utf8', content.encode("utf8"))
+
+        # For the rest of the story, see TestSubstitutions in
+        # test_tree.py.
+
+    def test_html5_style_meta_tag_reflects_current_encoding(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta id="encoding" charset="x-sjis" />')
+
+        # Here's a document incorporating that meta tag.
+        shift_jis_html = (
+            '<html><head>\n%s\n'
+            '<meta http-equiv="Content-language" content="ja"/>'
+            '</head><body>Shift-JIS markup goes here.') % meta_tag
+        soup = self.soup(shift_jis_html)
+
+        # Parse the document, and the charset is seemingly unaffected.
+        parsed_meta = soup.find('meta', id="encoding")
+        charset = parsed_meta['charset']
+        self.assertEqual('x-sjis', charset)
+
+        # But that value is actually a CharsetMetaAttributeValue object.
+        self.assertTrue(isinstance(charset, CharsetMetaAttributeValue))
+
+        # And it will take on a value that reflects its current
+        # encoding.
+        self.assertEqual('utf8', charset.encode("utf8"))
+
+    def test_tag_with_no_attributes_can_have_attributes_added(self):
+        data = self.soup("<a>text</a>")
+        data.a['foo'] = 'bar'
+        self.assertEqual('<a foo="bar">text</a>', data.a.decode())
+
+class XMLTreeBuilderSmokeTest(object):
+
+    def test_docstring_generated(self):
+        soup = self.soup("<root/>")
+        self.assertEqual(
+            soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
+
+    def test_real_xhtml_document(self):
+        """A real XHTML document should come out *exactly* the same as it went in."""
+        markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(
+            soup.encode("utf-8"), markup)
+
+
+    def test_docstring_includes_correct_encoding(self):
+        soup = self.soup("<root/>")
+        self.assertEqual(
+            soup.encode("latin1"),
+            b'<?xml version="1.0" encoding="latin1"?>\n<root/>')
+
+    def test_large_xml_document(self):
+        """A large XML document should come out the same as it went in."""
+        markup = (b'<?xml version="1.0" encoding="utf-8"?>\n<root>'
+                  + b'0' * (2**12)
+                  + b'</root>')
+        soup = self.soup(markup)
+        self.assertEqual(soup.encode("utf-8"), markup)
+
+
+    def test_tags_are_empty_element_if_and_only_if_they_are_empty(self):
+        self.assertSoupEquals("<p>", "<p/>")
+        self.assertSoupEquals("<p>foo</p>")
+
+    def test_namespaces_are_preserved(self):
+        markup = '<root xmlns:a="http://example.com/" xmlns:b="http://example.net/"><a:foo>This tag is in the a namespace</a:foo><b:foo>This tag is in the b namespace</b:foo></root>'
+        soup = self.soup(markup)
+        root = soup.root
+        self.assertEqual("http://example.com/", root['xmlns:a'])
+        self.assertEqual("http://example.net/", root['xmlns:b'])
+
+
+class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
+    """Smoke test for a tree builder that supports HTML5."""
+
+    def test_real_xhtml_document(self):
+        # Since XHTML is not HTML5, HTML5 parsers are not tested to handle
+        # XHTML documents in any particular way.
+        pass
+
+    def test_html_tags_have_namespace(self):
+        markup = "<a>"
+        soup = self.soup(markup)
+        self.assertEqual("http://www.w3.org/1999/xhtml", soup.a.namespace)
+
+    def test_svg_tags_have_namespace(self):
+        markup = '<svg><circle/></svg>'
+        soup = self.soup(markup)
+        namespace = "http://www.w3.org/2000/svg"
+        self.assertEqual(namespace, soup.svg.namespace)
+        self.assertEqual(namespace, soup.circle.namespace)
+
+
+    def test_mathml_tags_have_namespace(self):
+        markup = '<math><msqrt>5</msqrt></math>'
+        soup = self.soup(markup)
+        namespace = 'http://www.w3.org/1998/Math/MathML'
+        self.assertEqual(namespace, soup.math.namespace)
+        self.assertEqual(namespace, soup.msqrt.namespace)
+
+
+def skipIf(condition, reason):
+   def nothing(test, *args, **kwargs):
+       return None
+
+   def decorator(test_item):
+       if condition:
+           return nothing
+       else:
+           return test_item
+
+   return decorator
diff --git a/lib/bs4/tests/__init__.py b/lib/bs4/tests/__init__.py
new file mode 100644
index 00000000..142c8cc3
--- /dev/null
+++ b/lib/bs4/tests/__init__.py
@@ -0,0 +1 @@
+"The beautifulsoup tests."
diff --git a/lib/bs4/tests/test_builder_registry.py b/lib/bs4/tests/test_builder_registry.py
new file mode 100644
index 00000000..92ad10fb
--- /dev/null
+++ b/lib/bs4/tests/test_builder_registry.py
@@ -0,0 +1,141 @@
+"""Tests of the builder registry."""
+
+import unittest
+
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry as registry,
+    HTMLParserTreeBuilder,
+    TreeBuilderRegistry,
+)
+
+try:
+    from bs4.builder import HTML5TreeBuilder
+    HTML5LIB_PRESENT = True
+except ImportError:
+    HTML5LIB_PRESENT = False
+
+try:
+    from bs4.builder import (
+        LXMLTreeBuilderForXML,
+        LXMLTreeBuilder,
+        )
+    LXML_PRESENT = True
+except ImportError:
+    LXML_PRESENT = False
+
+
+class BuiltInRegistryTest(unittest.TestCase):
+    """Test the built-in registry with the default builders registered."""
+
+    def test_combination(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('fast', 'html'),
+                             LXMLTreeBuilder)
+
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('permissive', 'xml'),
+                             LXMLTreeBuilderForXML)
+        self.assertEqual(registry.lookup('strict', 'html'),
+                          HTMLParserTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEqual(registry.lookup('html5lib', 'html'),
+                              HTML5TreeBuilder)
+
+    def test_lookup_by_markup_type(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
+            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
+        else:
+            self.assertEqual(registry.lookup('xml'), None)
+            if HTML5LIB_PRESENT:
+                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
+            else:
+                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
+
+    def test_named_library(self):
+        if LXML_PRESENT:
+            self.assertEqual(registry.lookup('lxml', 'xml'),
+                             LXMLTreeBuilderForXML)
+            self.assertEqual(registry.lookup('lxml', 'html'),
+                             LXMLTreeBuilder)
+        if HTML5LIB_PRESENT:
+            self.assertEqual(registry.lookup('html5lib'),
+                              HTML5TreeBuilder)
+
+        self.assertEqual(registry.lookup('html.parser'),
+                          HTMLParserTreeBuilder)
+
+    def test_beautifulsoup_constructor_does_lookup(self):
+        # You can pass in a string.
+        BeautifulSoup("", features="html")
+        # Or a list of strings.
+        BeautifulSoup("", features=["html", "fast"])
+
+        # You'll get an exception if BS can't find an appropriate
+        # builder.
+        self.assertRaises(ValueError, BeautifulSoup,
+                          "", features="no-such-feature")
+
+class RegistryTest(unittest.TestCase):
+    """Test the TreeBuilderRegistry class in general."""
+
+    def setUp(self):
+        self.registry = TreeBuilderRegistry()
+
+    def builder_for_features(self, *feature_list):
+        cls = type('Builder_' + '_'.join(feature_list),
+                   (object,), {'features' : feature_list})
+
+        self.registry.register(cls)
+        return cls
+
+    def test_register_with_no_features(self):
+        builder = self.builder_for_features()
+
+        # Since the builder advertises no features, you can't find it
+        # by looking up features.
+        self.assertEqual(self.registry.lookup('foo'), None)
+
+        # But you can find it by doing a lookup with no features, if
+        # this happens to be the only registered builder.
+        self.assertEqual(self.registry.lookup(), builder)
+
+    def test_register_with_features_makes_lookup_succeed(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEqual(self.registry.lookup('foo'), builder)
+        self.assertEqual(self.registry.lookup('bar'), builder)
+
+    def test_lookup_fails_when_no_builder_implements_feature(self):
+        builder = self.builder_for_features('foo', 'bar')
+        self.assertEqual(self.registry.lookup('baz'), None)
+
+    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
+        builder1 = self.builder_for_features('foo')
+        builder2 = self.builder_for_features('bar')
+        self.assertEqual(self.registry.lookup(), builder2)
+
+    def test_lookup_fails_when_no_tree_builders_registered(self):
+        self.assertEqual(self.registry.lookup(), None)
+
+    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
+        has_one = self.builder_for_features('foo')
+        has_the_other = self.builder_for_features('bar')
+        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
+        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
+        lacks_one = self.builder_for_features('bar')
+        has_the_other = self.builder_for_features('foo')
+
+        # There are two builders featuring 'foo' and 'bar', but
+        # the one that also features 'quux' was registered later.
+        self.assertEqual(self.registry.lookup('foo', 'bar'),
+                          has_both_late)
+
+        # There is only one builder featuring 'foo', 'bar', and 'baz'.
+        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
+                          has_both_early)
+
+    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
+        builder1 = self.builder_for_features('foo', 'bar')
+        builder2 = self.builder_for_features('foo', 'baz')
+        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
diff --git a/lib/bs4/tests/test_docs.py b/lib/bs4/tests/test_docs.py
new file mode 100644
index 00000000..5b9f6770
--- /dev/null
+++ b/lib/bs4/tests/test_docs.py
@@ -0,0 +1,36 @@
+"Test harness for doctests."
+
+# pylint: disable-msg=E0611,W0142
+
+__metaclass__ = type
+__all__ = [
+    'additional_tests',
+    ]
+
+import atexit
+import doctest
+import os
+#from pkg_resources import (
+#    resource_filename, resource_exists, resource_listdir, cleanup_resources)
+import unittest
+
+DOCTEST_FLAGS = (
+    doctest.ELLIPSIS |
+    doctest.NORMALIZE_WHITESPACE |
+    doctest.REPORT_NDIFF)
+
+
+# def additional_tests():
+#     "Run the doc tests (README.txt and docs/*, if any exist)"
+#     doctest_files = [
+#         os.path.abspath(resource_filename('bs4', 'README.txt'))]
+#     if resource_exists('bs4', 'docs'):
+#         for name in resource_listdir('bs4', 'docs'):
+#             if name.endswith('.txt'):
+#                 doctest_files.append(
+#                     os.path.abspath(
+#                         resource_filename('bs4', 'docs/%s' % name)))
+#     kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
+#     atexit.register(cleanup_resources)
+#     return unittest.TestSuite((
+#         doctest.DocFileSuite(*doctest_files, **kwargs)))
diff --git a/lib/bs4/tests/test_html5lib.py b/lib/bs4/tests/test_html5lib.py
new file mode 100644
index 00000000..f195f7d0
--- /dev/null
+++ b/lib/bs4/tests/test_html5lib.py
@@ -0,0 +1,58 @@
+"""Tests to ensure that the html5lib tree builder generates good trees."""
+
+import warnings
+
+try:
+    from bs4.builder import HTML5TreeBuilder
+    HTML5LIB_PRESENT = True
+except ImportError, e:
+    HTML5LIB_PRESENT = False
+from bs4.element import SoupStrainer
+from bs4.testing import (
+    HTML5TreeBuilderSmokeTest,
+    SoupTest,
+    skipIf,
+)
+
+@skipIf(
+    not HTML5LIB_PRESENT,
+    "html5lib seems not to be present, not testing its tree builder.")
+class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
+    """See ``HTML5TreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return HTML5TreeBuilder()
+
+    def test_soupstrainer(self):
+        # The html5lib tree builder does not support SoupStrainers.
+        strainer = SoupStrainer("b")
+        markup = "<p>A <b>bold</b> statement.</p>"
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(
+            soup.decode(), self.document_for(markup))
+
+        self.assertTrue(
+            "the html5lib tree builder doesn't support parse_only" in
+            str(w[0].message))
+
+    def test_correctly_nested_tables(self):
+        """html5lib inserts <tbody> tags where other parsers don't."""
+        markup = ('<table id="1">'
+                  '<tr>'
+                  "<td>Here's another table:"
+                  '<table id="2">'
+                  '<tr><td>foo</td></tr>'
+                  '</table></td>')
+
+        self.assertSoupEquals(
+            markup,
+            '<table id="1"><tbody><tr><td>Here\'s another table:'
+            '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
+            '</td></tr></tbody></table>')
+
+        self.assertSoupEquals(
+            "<table><thead><tr><td>Foo</td></tr></thead>"
+            "<tbody><tr><td>Bar</td></tr></tbody>"
+            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
diff --git a/lib/bs4/tests/test_htmlparser.py b/lib/bs4/tests/test_htmlparser.py
new file mode 100644
index 00000000..bcb5ed23
--- /dev/null
+++ b/lib/bs4/tests/test_htmlparser.py
@@ -0,0 +1,19 @@
+"""Tests to ensure that the html.parser tree builder generates good
+trees."""
+
+from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
+from bs4.builder import HTMLParserTreeBuilder
+
+class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+
+    @property
+    def default_builder(self):
+        return HTMLParserTreeBuilder()
+
+    def test_namespaced_system_doctype(self):
+        # html.parser can't handle namespaced doctypes, so skip this one.
+        pass
+
+    def test_namespaced_public_doctype(self):
+        # html.parser can't handle namespaced doctypes, so skip this one.
+        pass
diff --git a/lib/bs4/tests/test_lxml.py b/lib/bs4/tests/test_lxml.py
new file mode 100644
index 00000000..39e26bfb
--- /dev/null
+++ b/lib/bs4/tests/test_lxml.py
@@ -0,0 +1,75 @@
+"""Tests to ensure that the lxml tree builder generates good trees."""
+
+import re
+import warnings
+
+try:
+    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
+    LXML_PRESENT = True
+except ImportError, e:
+    LXML_PRESENT = False
+
+from bs4 import (
+    BeautifulSoup,
+    BeautifulStoneSoup,
+    )
+from bs4.element import Comment, Doctype, SoupStrainer
+from bs4.testing import skipIf
+from bs4.tests import test_htmlparser
+from bs4.testing import (
+    HTMLTreeBuilderSmokeTest,
+    XMLTreeBuilderSmokeTest,
+    SoupTest,
+    skipIf,
+)
+
+@skipIf(
+    not LXML_PRESENT,
+    "lxml seems not to be present, not testing its tree builder.")
+class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
+    """See ``HTMLTreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return LXMLTreeBuilder()
+
+    def test_out_of_range_entity(self):
+        self.assertSoupEquals(
+            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
+        self.assertSoupEquals(
+            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
+        self.assertSoupEquals(
+            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
+
+    def test_beautifulstonesoup_is_xml_parser(self):
+        # Make sure that the deprecated BSS class uses an xml builder
+        # if one is installed.
+        with warnings.catch_warnings(record=False) as w:
+            soup = BeautifulStoneSoup("<b />")
+            self.assertEqual(u"<b/>", unicode(soup.b))
+
+    def test_real_xhtml_document(self):
+        """lxml strips the XML definition from an XHTML doc, which is fine."""
+        markup = b"""<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head><title>Hello.</title></head>
+<body>Goodbye.</body>
+</html>"""
+        soup = self.soup(markup)
+        self.assertEqual(
+            soup.encode("utf-8").replace(b"\n", b''),
+            markup.replace(b'\n', b'').replace(
+                b'<?xml version="1.0" encoding="utf-8"?>', b''))
+
+
+@skipIf(
+    not LXML_PRESENT,
+    "lxml seems not to be present, not testing its XML tree builder.")
+class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
+    """See ``HTMLTreeBuilderSmokeTest``."""
+
+    @property
+    def default_builder(self):
+        return LXMLTreeBuilderForXML()
+
diff --git a/lib/bs4/tests/test_soup.py b/lib/bs4/tests/test_soup.py
new file mode 100644
index 00000000..23a664e7
--- /dev/null
+++ b/lib/bs4/tests/test_soup.py
@@ -0,0 +1,368 @@
+# -*- coding: utf-8 -*-
+"""Tests of Beautiful Soup as a whole."""
+
+import unittest
+from bs4 import (
+    BeautifulSoup,
+    BeautifulStoneSoup,
+)
+from bs4.element import (
+    CharsetMetaAttributeValue,
+    ContentMetaAttributeValue,
+    SoupStrainer,
+    NamespacedAttribute,
+    )
+import bs4.dammit
+from bs4.dammit import EntitySubstitution, UnicodeDammit
+from bs4.testing import (
+    SoupTest,
+    skipIf,
+)
+import warnings
+
+try:
+    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
+    LXML_PRESENT = True
+except ImportError, e:
+    LXML_PRESENT = False
+
+class TestDeprecatedConstructorArguments(SoupTest):
+
+    def test_parseOnlyThese_renamed_to_parse_only(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
+        msg = str(w[0].message)
+        self.assertTrue("parseOnlyThese" in msg)
+        self.assertTrue("parse_only" in msg)
+        self.assertEqual(b"<b></b>", soup.encode())
+
+    def test_fromEncoding_renamed_to_from_encoding(self):
+        with warnings.catch_warnings(record=True) as w:
+            utf8 = b"\xc3\xa9"
+            soup = self.soup(utf8, fromEncoding="utf8")
+        msg = str(w[0].message)
+        self.assertTrue("fromEncoding" in msg)
+        self.assertTrue("from_encoding" in msg)
+        self.assertEqual("utf8", soup.original_encoding)
+
+    def test_unrecognized_keyword_argument(self):
+        self.assertRaises(
+            TypeError, self.soup, "<a>", no_such_argument=True)
+
+    @skipIf(
+        not LXML_PRESENT,
+        "lxml not present, not testing BeautifulStoneSoup.")
+    def test_beautifulstonesoup(self):
+        with warnings.catch_warnings(record=True) as w:
+            soup = BeautifulStoneSoup("<markup>")
+            self.assertTrue(isinstance(soup, BeautifulSoup))
+            self.assertTrue("BeautifulStoneSoup class is deprecated")
+
+class TestSelectiveParsing(SoupTest):
+
+    def test_parse_with_soupstrainer(self):
+        markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
+        strainer = SoupStrainer("b")
+        soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
+
+
+class TestEntitySubstitution(unittest.TestCase):
+    """Standalone tests of the EntitySubstitution class."""
+    def setUp(self):
+        self.sub = EntitySubstitution
+
+    def test_simple_html_substitution(self):
+        # Unicode characters corresponding to named HTML entites
+        # are substituted, and no others.
+        s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
+        self.assertEqual(self.sub.substitute_html(s),
+                          u"foo&forall;\N{SNOWMAN}&otilde;bar")
+
+    def test_smart_quote_substitution(self):
+        # MS smart quotes are a common source of frustration, so we
+        # give them a special test.
+        quotes = b"\x91\x92foo\x93\x94"
+        dammit = UnicodeDammit(quotes)
+        self.assertEqual(self.sub.substitute_html(dammit.markup),
+                          "&lsquo;&rsquo;foo&ldquo;&rdquo;")
+
+    def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
+        s = 'Welcome to "my bar"'
+        self.assertEqual(self.sub.substitute_xml(s, False), s)
+
+    def test_xml_attribute_quoting_normally_uses_double_quotes(self):
+        self.assertEqual(self.sub.substitute_xml("Welcome", True),
+                          '"Welcome"')
+        self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
+                          '"Bob\'s Bar"')
+
+    def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
+        s = 'Welcome to "my bar"'
+        self.assertEqual(self.sub.substitute_xml(s, True),
+                          "'Welcome to \"my bar\"'")
+
+    def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
+        s = 'Welcome to "Bob\'s Bar"'
+        self.assertEqual(
+            self.sub.substitute_xml(s, True),
+            '"Welcome to &quot;Bob\'s Bar&quot;"')
+
+    def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
+        quoted = 'Welcome to "Bob\'s Bar"'
+        self.assertEqual(self.sub.substitute_xml(quoted), quoted)
+
+    def test_xml_quoting_handles_angle_brackets(self):
+        self.assertEqual(
+            self.sub.substitute_xml("foo<bar>"),
+            "foo&lt;bar&gt;")
+
+    def test_xml_quoting_handles_ampersands(self):
+        self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
+
+    def test_xml_quoting_ignores_ampersands_when_they_are_part_of_an_entity(self):
+        self.assertEqual(
+            self.sub.substitute_xml("&Aacute;T&T"),
+            "&Aacute;T&amp;T")
+
+    def test_quotes_not_html_substituted(self):
+        """There's no need to do this except inside attribute values."""
+        text = 'Bob\'s "bar"'
+        self.assertEqual(self.sub.substitute_html(text), text)
+
+
+class TestEncodingConversion(SoupTest):
+    # Test Beautiful Soup's ability to decode and encode from various
+    # encodings.
+
+    def setUp(self):
+        super(TestEncodingConversion, self).setUp()
+        self.unicode_data = u"<html><head></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>"
+        self.utf8_data = self.unicode_data.encode("utf-8")
+        # Just so you know what it looks like.
+        self.assertEqual(
+            self.utf8_data,
+            b"<html><head></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>")
+
+    def test_ascii_in_unicode_out(self):
+        # ASCII input is converted to Unicode. The original_encoding
+        # attribute is set.
+        ascii = b"<foo>a</foo>"
+        soup_from_ascii = self.soup(ascii)
+        unicode_output = soup_from_ascii.decode()
+        self.assertTrue(isinstance(unicode_output, unicode))
+        self.assertEqual(unicode_output, self.document_for(ascii.decode()))
+        self.assertEqual(soup_from_ascii.original_encoding, "ascii")
+
+    def test_unicode_in_unicode_out(self):
+        # Unicode input is left alone. The original_encoding attribute
+        # is not set.
+        soup_from_unicode = self.soup(self.unicode_data)
+        self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
+        self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
+        self.assertEqual(soup_from_unicode.original_encoding, None)
+
+    def test_utf8_in_unicode_out(self):
+        # UTF-8 input is converted to Unicode. The original_encoding
+        # attribute is set.
+        soup_from_utf8 = self.soup(self.utf8_data)
+        self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
+        self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
+
+    def test_utf8_out(self):
+        # The internal data structures can be encoded as UTF-8.
+        soup_from_unicode = self.soup(self.unicode_data)
+        self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
+
+
+class TestUnicodeDammit(unittest.TestCase):
+    """Standalone tests of Unicode, Dammit."""
+
+    def test_smart_quotes_to_unicode(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup)
+        self.assertEqual(
+            dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
+
+    def test_smart_quotes_to_xml_entities(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
+        self.assertEqual(
+            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
+
+    def test_smart_quotes_to_html_entities(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="html")
+        self.assertEqual(
+            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
+
+    def test_smart_quotes_to_ascii(self):
+        markup = b"<foo>\x91\x92\x93\x94</foo>"
+        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
+        self.assertEqual(
+            dammit.unicode_markup, """<foo>''""</foo>""")
+
+    def test_detect_utf8(self):
+        utf8 = b"\xc3\xa9"
+        dammit = UnicodeDammit(utf8)
+        self.assertEqual(dammit.unicode_markup, u'\xe9')
+        self.assertEqual(dammit.original_encoding, 'utf-8')
+
+    def test_convert_hebrew(self):
+        hebrew = b"\xed\xe5\xec\xf9"
+        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
+        self.assertEqual(dammit.original_encoding, 'iso-8859-8')
+        self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
+
+    def test_dont_see_smart_quotes_where_there_are_none(self):
+        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
+        dammit = UnicodeDammit(utf_8)
+        self.assertEqual(dammit.original_encoding, 'utf-8')
+        self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
+
+    def test_ignore_inappropriate_codecs(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
+        self.assertEqual(dammit.original_encoding, 'utf-8')
+
+    def test_ignore_invalid_codecs(self):
+        utf8_data = u"Räksmörgås".encode("utf-8")
+        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
+            dammit = UnicodeDammit(utf8_data, [bad_encoding])
+            self.assertEqual(dammit.original_encoding, 'utf-8')
+
+    def test_detect_html5_style_meta_tag(self):
+
+        for data in (
+            b'<html><meta charset="euc-jp" /></html>',
+            b"<html><meta charset='euc-jp' /></html>",
+            b"<html><meta charset=euc-jp /></html>",
+            b"<html><meta charset=euc-jp/></html>"):
+            dammit = UnicodeDammit(data, is_html=True)
+            self.assertEqual(
+                "euc-jp", dammit.original_encoding)
+
+    def test_last_ditch_entity_replacement(self):
+        # This is a UTF-8 document that contains bytestrings
+        # completely incompatible with UTF-8 (ie. encoded with some other
+        # encoding).
+        #
+        # Since there is no consistent encoding for the document,
+        # Unicode, Dammit will eventually encode the document as UTF-8
+        # and encode the incompatible characters as REPLACEMENT
+        # CHARACTER.
+        #
+        # If chardet is installed, it will detect that the document
+        # can be converted into ISO-8859-1 without errors. This happens
+        # to be the wrong encoding, but it is a consistent encoding, so the
+        # code we're testing here won't run.
+        #
+        # So we temporarily disable chardet if it's present.
+        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
+<html><b>\330\250\330\252\330\261</b>
+<i>\310\322\321\220\312\321\355\344</i></html>"""
+        chardet = bs4.dammit.chardet
+        try:
+            bs4.dammit.chardet = None
+            with warnings.catch_warnings(record=True) as w:
+                dammit = UnicodeDammit(doc)
+                self.assertEqual(True, dammit.contains_replacement_characters)
+                self.assertTrue(u"\ufffd" in dammit.unicode_markup)
+
+                soup = BeautifulSoup(doc, "html.parser")
+                self.assertTrue(soup.contains_replacement_characters)
+
+                msg = w[0].message
+                self.assertTrue(isinstance(msg, UnicodeWarning))
+                self.assertTrue("Some characters could not be decoded" in str(msg))
+        finally:
+            bs4.dammit.chardet = chardet
+
+    def test_sniffed_xml_encoding(self):
+        # A document written in UTF-16LE will be converted by a different
+        # code path that sniffs the byte order markers.
+        data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
+        dammit = UnicodeDammit(data)
+        self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
+        self.assertEqual("utf-16le", dammit.original_encoding)
+
+    def test_detwingle(self):
+        # Here's a UTF8 document.
+        utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
+
+        # Here's a Windows-1252 document.
+        windows_1252 = (
+            u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
+            u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
+
+        # Through some unholy alchemy, they've been stuck together.
+        doc = utf8 + windows_1252 + utf8
+
+        # The document can't be turned into UTF-8:
+        self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
+
+        # Unicode, Dammit thinks the whole document is Windows-1252,
+        # and decodes it into "â˜ƒâ˜ƒâ˜ƒ“Hi, I like Windows!”â˜ƒâ˜ƒâ˜ƒ"
+
+        # But if we run it through fix_embedded_windows_1252, it's fixed:
+
+        fixed = UnicodeDammit.detwingle(doc)
+        self.assertEqual(
+            u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
+
+    def test_detwingle_ignores_multibyte_characters(self):
+        # Each of these characters has a UTF-8 representation ending
+        # in \x93. \x93 is a smart quote if interpreted as
+        # Windows-1252. But our code knows to skip over multibyte
+        # UTF-8 characters, so they'll survive the process unscathed.
+        for tricky_unicode_char in (
+            u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
+            u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
+            u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
+            ):
+            input = tricky_unicode_char.encode("utf8")
+            self.assertTrue(input.endswith(b'\x93'))
+            output = UnicodeDammit.detwingle(input)
+            self.assertEqual(output, input)
+
+class TestNamedspacedAttribute(SoupTest):
+
+    def test_name_may_be_none(self):
+        a = NamespacedAttribute("xmlns", None)
+        self.assertEqual(a, "xmlns")
+
+    def test_attribute_is_equivalent_to_colon_separated_string(self):
+        a = NamespacedAttribute("a", "b")
+        self.assertEqual("a:b", a)
+
+    def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
+        a = NamespacedAttribute("a", "b", "c")
+        b = NamespacedAttribute("a", "b", "c")
+        self.assertEqual(a, b)
+
+        # The actual namespace is not considered.
+        c = NamespacedAttribute("a", "b", None)
+        self.assertEqual(a, c)
+
+        # But name and prefix are important.
+        d = NamespacedAttribute("a", "z", "c")
+        self.assertNotEqual(a, d)
+
+        e = NamespacedAttribute("z", "b", "c")
+        self.assertNotEqual(a, e)
+
+
+class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
+
+    def test_content_meta_attribute_value(self):
+        value = CharsetMetaAttributeValue("euc-jp")
+        self.assertEqual("euc-jp", value)
+        self.assertEqual("euc-jp", value.original_value)
+        self.assertEqual("utf8", value.encode("utf8"))
+
+
+    def test_content_meta_attribute_value(self):
+        value = ContentMetaAttributeValue("text/html; charset=euc-jp")
+        self.assertEqual("text/html; charset=euc-jp", value)
+        self.assertEqual("text/html; charset=euc-jp", value.original_value)
+        self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
diff --git a/lib/bs4/tests/test_tree.py b/lib/bs4/tests/test_tree.py
new file mode 100644
index 00000000..cc573ede
--- /dev/null
+++ b/lib/bs4/tests/test_tree.py
@@ -0,0 +1,1695 @@
+# -*- coding: utf-8 -*-
+"""Tests for Beautiful Soup's tree traversal methods.
+
+The tree traversal methods are the main advantage of using Beautiful
+Soup over just using a parser.
+
+Different parsers will build different Beautiful Soup trees given the
+same markup, but all Beautiful Soup trees can be traversed with the
+methods tested here.
+"""
+
+import copy
+import pickle
+import re
+import warnings
+from bs4 import BeautifulSoup
+from bs4.builder import (
+    builder_registry,
+    HTMLParserTreeBuilder,
+)
+from bs4.element import (
+    CData,
+    Doctype,
+    NavigableString,
+    SoupStrainer,
+    Tag,
+)
+from bs4.testing import (
+    SoupTest,
+    skipIf,
+)
+
+XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
+LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
+
+class TreeTest(SoupTest):
+
+    def assertSelects(self, tags, should_match):
+        """Make sure that the given tags have the correct text.
+
+        This is used in tests that define a bunch of tags, each
+        containing a single string, and then select certain strings by
+        some mechanism.
+        """
+        self.assertEqual([tag.string for tag in tags], should_match)
+
+    def assertSelectsIDs(self, tags, should_match):
+        """Make sure that the given tags have the correct IDs.
+
+        This is used in tests that define a bunch of tags, each
+        containing a single string, and then select certain strings by
+        some mechanism.
+        """
+        self.assertEqual([tag['id'] for tag in tags], should_match)
+
+
+class TestFind(TreeTest):
+    """Basic tests of the find() method.
+
+    find() just calls find_all() with limit=1, so it's not tested all
+    that thouroughly here.
+    """
+
+    def test_find_tag(self):
+        soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
+        self.assertEqual(soup.find("b").string, "2")
+
+    def test_unicode_text_find(self):
+        soup = self.soup(u'<h1>Räksmörgås</h1>')
+        self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås')
+
+class TestFindAll(TreeTest):
+    """Basic tests of the find_all() method."""
+
+    def test_find_all_text_nodes(self):
+        """You can search the tree for text nodes."""
+        soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
+        # Exact match.
+        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
+        # Match any of a number of strings.
+        self.assertEqual(
+            soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
+        # Match a regular expression.
+        self.assertEqual(soup.find_all(text=re.compile('.*')),
+                         [u"Foo", u"bar", u'\xbb'])
+        # Match anything.
+        self.assertEqual(soup.find_all(text=True),
+                         [u"Foo", u"bar", u'\xbb'])
+
+    def test_find_all_limit(self):
+        """You can limit the number of items returned by find_all."""
+        soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
+        self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
+        self.assertSelects(soup.find_all('a', limit=1), ["1"])
+        self.assertSelects(
+            soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
+
+        # A limit of 0 means no limit.
+        self.assertSelects(
+            soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
+
+    def test_calling_a_tag_is_calling_findall(self):
+        soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
+        self.assertSelects(soup('a', limit=1), ["1"])
+        self.assertSelects(soup.b(id="foo"), ["3"])
+
+    def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
+        soup = self.soup("<a></a>")
+        # Create a self-referential list.
+        l = []
+        l.append(l)
+
+        # Without special code in _normalize_search_value, this would cause infinite
+        # recursion.
+        self.assertEqual([], soup.find_all(l))
+
+class TestFindAllBasicNamespaces(TreeTest):
+
+    def test_find_by_namespaced_name(self):
+        soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
+        self.assertEqual("4", soup.find("mathml:msqrt").string)
+        self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
+
+
+class TestFindAllByName(TreeTest):
+    """Test ways of finding tags by tag name."""
+
+    def setUp(self):
+        super(TreeTest, self).setUp()
+        self.tree =  self.soup("""<a>First tag.</a>
+                                  <b>Second tag.</b>
+                                  <c>Third <a>Nested tag.</a> tag.</c>""")
+
+    def test_find_all_by_tag_name(self):
+        # Find all the <a> tags.
+        self.assertSelects(
+            self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_name_and_text(self):
+        self.assertSelects(
+            self.tree.find_all('a', text='First tag.'), ['First tag.'])
+
+        self.assertSelects(
+            self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
+
+        self.assertSelects(
+            self.tree.find_all('a', text=re.compile("tag")),
+            ['First tag.', 'Nested tag.'])
+
+
+    def test_find_all_on_non_root_element(self):
+        # You can call find_all on any node, not just the root.
+        self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
+
+    def test_calling_element_invokes_find_all(self):
+        self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_strainer(self):
+        self.assertSelects(
+            self.tree.find_all(SoupStrainer('a')),
+            ['First tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_names(self):
+        self.assertSelects(
+            self.tree.find_all(['a', 'b']),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_dict(self):
+        self.assertSelects(
+            self.tree.find_all({'a' : True, 'b' : True}),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_by_tag_re(self):
+        self.assertSelects(
+            self.tree.find_all(re.compile('^[ab]$')),
+            ['First tag.', 'Second tag.', 'Nested tag.'])
+
+    def test_find_all_with_tags_matching_method(self):
+        # You can define an oracle method that determines whether
+        # a tag matches the search.
+        def id_matches_name(tag):
+            return tag.name == tag.get('id')
+
+        tree = self.soup("""<a id="a">Match 1.</a>
+                            <a id="1">Does not match.</a>
+                            <b id="b">Match 2.</a>""")
+
+        self.assertSelects(
+            tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
+
+
+class TestFindAllByAttribute(TreeTest):
+
+    def test_find_all_by_attribute_name(self):
+        # You can pass in keyword arguments to find_all to search by
+        # attribute.
+        tree = self.soup("""
+                         <a id="first">Matching a.</a>
+                         <a id="second">
+                          Non-matching <b id="first">Matching b.</b>a.
+                         </a>""")
+        self.assertSelects(tree.find_all(id='first'),
+                           ["Matching a.", "Matching b."])
+
+    def test_find_all_by_utf8_attribute_value(self):
+        peace = u"םולש".encode("utf8")
+        data = u'<a title="םולש"></a>'.encode("utf8")
+        soup = self.soup(data)
+        self.assertEqual([soup.a], soup.find_all(title=peace))
+        self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
+        self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
+
+    def test_find_all_by_attribute_dict(self):
+        # You can pass in a dictionary as the argument 'attrs'. This
+        # lets you search for attributes like 'name' (a fixed argument
+        # to find_all) and 'class' (a reserved word in Python.)
+        tree = self.soup("""
+                         <a name="name1" class="class1">Name match.</a>
+                         <a name="name2" class="class2">Class match.</a>
+                         <a name="name3" class="class3">Non-match.</a>
+                         <name1>A tag called 'name1'.</name1>
+                         """)
+
+        # This doesn't do what you want.
+        self.assertSelects(tree.find_all(name='name1'),
+                           ["A tag called 'name1'."])
+        # This does what you want.
+        self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
+                           ["Name match."])
+
+        # Passing class='class2' would cause a syntax error.
+        self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
+                           ["Class match."])
+
+    def test_find_all_by_class(self):
+        # Passing in a string to 'attrs' will search the CSS class.
+        tree = self.soup("""
+                         <a class="1">Class 1.</a>
+                         <a class="2">Class 2.</a>
+                         <b class="1">Class 1.</b>
+                         <c class="3 4">Class 3 and 4.</c>
+                         """)
+        self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
+        self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
+        self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
+        self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
+
+    def test_find_by_class_when_multiple_classes_present(self):
+        tree = self.soup("<gar class='foo bar'>Found it</gar>")
+
+        attrs = { 'class' : re.compile("o") }
+        f = tree.find_all("gar", attrs=attrs)
+        self.assertSelects(f, ["Found it"])
+
+        f = tree.find_all("gar", re.compile("a"))
+        self.assertSelects(f, ["Found it"])
+
+        # Since the class is not the string "foo bar", but the two
+        # strings "foo" and "bar", this will not find anything.
+        attrs = { 'class' : re.compile("o b") }
+        f = tree.find_all("gar", attrs=attrs)
+        self.assertSelects(f, [])
+
+    def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
+        soup = self.soup("<a class='bar'>Found it</a>")
+
+        self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
+
+        def big_attribute_value(value):
+            return len(value) > 3
+
+        self.assertSelects(soup.find_all("a", big_attribute_value), [])
+
+        def small_attribute_value(value):
+            return len(value) <= 3
+
+        self.assertSelects(
+            soup.find_all("a", small_attribute_value), ["Found it"])
+
+    def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
+        soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
+        a, a2 = soup.find_all("a")
+        self.assertEqual([a, a2], soup.find_all("a", "foo"))
+        self.assertEqual([a], soup.find_all("a", "bar"))
+
+        # If you specify the attribute as a string that contains a
+        # space, only that specific value will be found.
+        self.assertEqual([a], soup.find_all("a", "foo bar"))
+        self.assertEqual([], soup.find_all("a", "bar foo"))
+
+    def test_find_all_by_attribute_soupstrainer(self):
+        tree = self.soup("""
+                         <a id="first">Match.</a>
+                         <a id="second">Non-match.</a>""")
+
+        strainer = SoupStrainer(attrs={'id' : 'first'})
+        self.assertSelects(tree.find_all(strainer), ['Match.'])
+
+    def test_find_all_with_missing_atribute(self):
+        # You can pass in None as the value of an attribute to find_all.
+        # This will match tags that do not have that attribute set.
+        tree = self.soup("""<a id="1">ID present.</a>
+                            <a>No ID present.</a>
+                            <a id="">ID is empty.</a>""")
+        self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
+
+    def test_find_all_with_defined_attribute(self):
+        # You can pass in None as the value of an attribute to find_all.
+        # This will match tags that have that attribute set to any value.
+        tree = self.soup("""<a id="1">ID present.</a>
+                            <a>No ID present.</a>
+                            <a id="">ID is empty.</a>""")
+        self.assertSelects(
+            tree.find_all(id=True), ["ID present.", "ID is empty."])
+
+    def test_find_all_with_numeric_attribute(self):
+        # If you search for a number, it's treated as a string.
+        tree = self.soup("""<a id=1>Unquoted attribute.</a>
+                            <a id="1">Quoted attribute.</a>""")
+
+        expected = ["Unquoted attribute.", "Quoted attribute."]
+        self.assertSelects(tree.find_all(id=1), expected)
+        self.assertSelects(tree.find_all(id="1"), expected)
+
+    def test_find_all_with_list_attribute_values(self):
+        # You can pass a list of attribute values instead of just one,
+        # and you'll get tags that match any of the values.
+        tree = self.soup("""<a id="1">1</a>
+                            <a id="2">2</a>
+                            <a id="3">3</a>
+                            <a>No ID.</a>""")
+        self.assertSelects(tree.find_all(id=["1", "3", "4"]),
+                           ["1", "3"])
+
+    def test_find_all_with_regular_expression_attribute_value(self):
+        # You can pass a regular expression as an attribute value, and
+        # you'll get tags whose values for that attribute match the
+        # regular expression.
+        tree = self.soup("""<a id="a">One a.</a>
+                            <a id="aa">Two as.</a>
+                            <a id="ab">Mixed as and bs.</a>
+                            <a id="b">One b.</a>
+                            <a>No ID.</a>""")
+
+        self.assertSelects(tree.find_all(id=re.compile("^a+$")),
+                           ["One a.", "Two as."])
+
+    def test_find_by_name_and_containing_string(self):
+        soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all("a", text="foo"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+        self.assertEqual([], soup.find_all("a", text="bar"))
+
+    def test_find_by_name_and_containing_string_when_string_is_buried(self):
+        soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
+        self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
+
+    def test_find_by_attribute_and_containing_string(self):
+        soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
+        a = soup.a
+
+        self.assertEqual([a], soup.find_all(id=2, text="foo"))
+        self.assertEqual([], soup.find_all(id=1, text="bar"))
+
+
+
+
+class TestIndex(TreeTest):
+    """Test Tag.index"""
+    def test_index(self):
+        tree = self.soup("""<div>
+                            <a>Identical</a>
+                            <b>Not identical</b>
+                            <a>Identical</a>
+
+                            <c><d>Identical with child</d></c>
+                            <b>Also not identical</b>
+                            <c><d>Identical with child</d></c>
+                            </div>""")
+        div = tree.div
+        for i, element in enumerate(div.contents):
+            self.assertEqual(i, div.index(element))
+        self.assertRaises(ValueError, tree.index, 1)
+
+
+class TestParentOperations(TreeTest):
+    """Test navigation and searching through an element's parents."""
+
+    def setUp(self):
+        super(TestParentOperations, self).setUp()
+        self.tree = self.soup('''<ul id="empty"></ul>
+                                 <ul id="top">
+                                  <ul id="middle">
+                                   <ul id="bottom">
+                                    <b>Start here</b>
+                                   </ul>
+                                  </ul>''')
+        self.start = self.tree.b
+
+
+    def test_parent(self):
+        self.assertEqual(self.start.parent['id'], 'bottom')
+        self.assertEqual(self.start.parent.parent['id'], 'middle')
+        self.assertEqual(self.start.parent.parent.parent['id'], 'top')
+
+    def test_parent_of_top_tag_is_soup_object(self):
+        top_tag = self.tree.contents[0]
+        self.assertEqual(top_tag.parent, self.tree)
+
+    def test_soup_object_has_no_parent(self):
+        self.assertEqual(None, self.tree.parent)
+
+    def test_find_parents(self):
+        self.assertSelectsIDs(
+            self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
+        self.assertSelectsIDs(
+            self.start.find_parents('ul', id="middle"), ['middle'])
+
+    def test_find_parent(self):
+        self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
+
+    def test_parent_of_text_element(self):
+        text = self.tree.find(text="Start here")
+        self.assertEqual(text.parent.name, 'b')
+
+    def test_text_element_find_parent(self):
+        text = self.tree.find(text="Start here")
+        self.assertEqual(text.find_parent('ul')['id'], 'bottom')
+
+    def test_parent_generator(self):
+        parents = [parent['id'] for parent in self.start.parents
+                   if parent is not None and 'id' in parent.attrs]
+        self.assertEqual(parents, ['bottom', 'middle', 'top'])
+
+
+class ProximityTest(TreeTest):
+
+    def setUp(self):
+        super(TreeTest, self).setUp()
+        self.tree = self.soup(
+            '<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
+
+
+class TestNextOperations(ProximityTest):
+
+    def setUp(self):
+        super(TestNextOperations, self).setUp()
+        self.start = self.tree.b
+
+    def test_next(self):
+        self.assertEqual(self.start.next_element, "One")
+        self.assertEqual(self.start.next_element.next_element['id'], "2")
+
+    def test_next_of_last_item_is_none(self):
+        last = self.tree.find(text="Three")
+        self.assertEqual(last.next_element, None)
+
+    def test_next_of_root_is_none(self):
+        # The document root is outside the next/previous chain.
+        self.assertEqual(self.tree.next_element, None)
+
+    def test_find_all_next(self):
+        self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
+        self.start.find_all_next(id=3)
+        self.assertSelects(self.start.find_all_next(id=3), ["Three"])
+
+    def test_find_next(self):
+        self.assertEqual(self.start.find_next('b')['id'], '2')
+        self.assertEqual(self.start.find_next(text="Three"), "Three")
+
+    def test_find_next_for_text_element(self):
+        text = self.tree.find(text="One")
+        self.assertEqual(text.find_next("b").string, "Two")
+        self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
+
+    def test_next_generator(self):
+        start = self.tree.find(text="Two")
+        successors = [node for node in start.next_elements]
+        # There are two successors: the final <b> tag and its text contents.
+        tag, contents = successors
+        self.assertEqual(tag['id'], '3')
+        self.assertEqual(contents, "Three")
+
+class TestPreviousOperations(ProximityTest):
+
+    def setUp(self):
+        super(TestPreviousOperations, self).setUp()
+        self.end = self.tree.find(text="Three")
+
+    def test_previous(self):
+        self.assertEqual(self.end.previous_element['id'], "3")
+        self.assertEqual(self.end.previous_element.previous_element, "Two")
+
+    def test_previous_of_first_item_is_none(self):
+        first = self.tree.find('html')
+        self.assertEqual(first.previous_element, None)
+
+    def test_previous_of_root_is_none(self):
+        # The document root is outside the next/previous chain.
+        # XXX This is broken!
+        #self.assertEqual(self.tree.previous_element, None)
+        pass
+
+    def test_find_all_previous(self):
+        # The <b> tag containing the "Three" node is the predecessor
+        # of the "Three" node itself, which is why "Three" shows up
+        # here.
+        self.assertSelects(
+            self.end.find_all_previous('b'), ["Three", "Two", "One"])
+        self.assertSelects(self.end.find_all_previous(id=1), ["One"])
+
+    def test_find_previous(self):
+        self.assertEqual(self.end.find_previous('b')['id'], '3')
+        self.assertEqual(self.end.find_previous(text="One"), "One")
+
+    def test_find_previous_for_text_element(self):
+        text = self.tree.find(text="Three")
+        self.assertEqual(text.find_previous("b").string, "Three")
+        self.assertSelects(
+            text.find_all_previous("b"), ["Three", "Two", "One"])
+
+    def test_previous_generator(self):
+        start = self.tree.find(text="One")
+        predecessors = [node for node in start.previous_elements]
+
+        # There are four predecessors: the <b> tag containing "One"
+        # the <body> tag, the <head> tag, and the <html> tag.
+        b, body, head, html = predecessors
+        self.assertEqual(b['id'], '1')
+        self.assertEqual(body.name, "body")
+        self.assertEqual(head.name, "head")
+        self.assertEqual(html.name, "html")
+
+
+class SiblingTest(TreeTest):
+
+    def setUp(self):
+        super(SiblingTest, self).setUp()
+        markup = '''<html>
+                    <span id="1">
+                     <span id="1.1"></span>
+                    </span>
+                    <span id="2">
+                     <span id="2.1"></span>
+                    </span>
+                    <span id="3">
+                     <span id="3.1"></span>
+                    </span>
+                    <span id="4"></span>
+                    </html>'''
+        # All that whitespace looks good but makes the tests more
+        # difficult. Get rid of it.
+        markup = re.compile("\n\s*").sub("", markup)
+        self.tree = self.soup(markup)
+
+
+class TestNextSibling(SiblingTest):
+
+    def setUp(self):
+        super(TestNextSibling, self).setUp()
+        self.start = self.tree.find(id="1")
+
+    def test_next_sibling_of_root_is_none(self):
+        self.assertEqual(self.tree.next_sibling, None)
+
+    def test_next_sibling(self):
+        self.assertEqual(self.start.next_sibling['id'], '2')
+        self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
+
+        # Note the difference between next_sibling and next_element.
+        self.assertEqual(self.start.next_element['id'], '1.1')
+
+    def test_next_sibling_may_not_exist(self):
+        self.assertEqual(self.tree.html.next_sibling, None)
+
+        nested_span = self.tree.find(id="1.1")
+        self.assertEqual(nested_span.next_sibling, None)
+
+        last_span = self.tree.find(id="4")
+        self.assertEqual(last_span.next_sibling, None)
+
+    def test_find_next_sibling(self):
+        self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
+
+    def test_next_siblings(self):
+        self.assertSelectsIDs(self.start.find_next_siblings("span"),
+                              ['2', '3', '4'])
+
+        self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
+
+    def test_next_sibling_for_text_element(self):
+        soup = self.soup("Foo<b>bar</b>baz")
+        start = soup.find(text="Foo")
+        self.assertEqual(start.next_sibling.name, 'b')
+        self.assertEqual(start.next_sibling.next_sibling, 'baz')
+
+        self.assertSelects(start.find_next_siblings('b'), ['bar'])
+        self.assertEqual(start.find_next_sibling(text="baz"), "baz")
+        self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
+
+
+class TestPreviousSibling(SiblingTest):
+
+    def setUp(self):
+        super(TestPreviousSibling, self).setUp()
+        self.end = self.tree.find(id="4")
+
+    def test_previous_sibling_of_root_is_none(self):
+        self.assertEqual(self.tree.previous_sibling, None)
+
+    def test_previous_sibling(self):
+        self.assertEqual(self.end.previous_sibling['id'], '3')
+        self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
+
+        # Note the difference between previous_sibling and previous_element.
+        self.assertEqual(self.end.previous_element['id'], '3.1')
+
+    def test_previous_sibling_may_not_exist(self):
+        self.assertEqual(self.tree.html.previous_sibling, None)
+
+        nested_span = self.tree.find(id="1.1")
+        self.assertEqual(nested_span.previous_sibling, None)
+
+        first_span = self.tree.find(id="1")
+        self.assertEqual(first_span.previous_sibling, None)
+
+    def test_find_previous_sibling(self):
+        self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
+
+    def test_previous_siblings(self):
+        self.assertSelectsIDs(self.end.find_previous_siblings("span"),
+                              ['3', '2', '1'])
+
+        self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
+
+    def test_previous_sibling_for_text_element(self):
+        soup = self.soup("Foo<b>bar</b>baz")
+        start = soup.find(text="baz")
+        self.assertEqual(start.previous_sibling.name, 'b')
+        self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
+
+        self.assertSelects(start.find_previous_siblings('b'), ['bar'])
+        self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
+        self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
+
+
+class TestTagCreation(SoupTest):
+    """Test the ability to create new tags."""
+    def test_new_tag(self):
+        soup = self.soup("")
+        new_tag = soup.new_tag("foo", bar="baz")
+        self.assertTrue(isinstance(new_tag, Tag))
+        self.assertEqual("foo", new_tag.name)
+        self.assertEqual(dict(bar="baz"), new_tag.attrs)
+        self.assertEqual(None, new_tag.parent)
+
+    def test_tag_inherits_self_closing_rules_from_builder(self):
+        if XML_BUILDER_PRESENT:
+            xml_soup = BeautifulSoup("", "xml")
+            xml_br = xml_soup.new_tag("br")
+            xml_p = xml_soup.new_tag("p")
+
+            # Both the <br> and <p> tag are empty-element, just because
+            # they have no contents.
+            self.assertEqual(b"<br/>", xml_br.encode())
+            self.assertEqual(b"<p/>", xml_p.encode())
+
+        html_soup = BeautifulSoup("", "html")
+        html_br = html_soup.new_tag("br")
+        html_p = html_soup.new_tag("p")
+
+        # The HTML builder users HTML's rules about which tags are
+        # empty-element tags, and the new tags reflect these rules.
+        self.assertEqual(b"<br/>", html_br.encode())
+        self.assertEqual(b"<p></p>", html_p.encode())
+
+    def test_new_string_creates_navigablestring(self):
+        soup = self.soup("")
+        s = soup.new_string("foo")
+        self.assertEqual("foo", s)
+        self.assertTrue(isinstance(s, NavigableString))
+
+class TestTreeModification(SoupTest):
+
+    def test_attribute_modification(self):
+        soup = self.soup('<a id="1"></a>')
+        soup.a['id'] = 2
+        self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
+        del(soup.a['id'])
+        self.assertEqual(soup.decode(), self.document_for('<a></a>'))
+        soup.a['id2'] = 'foo'
+        self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
+
+    def test_new_tag_creation(self):
+        builder = builder_registry.lookup('html')()
+        soup = self.soup("<body></body>", builder=builder)
+        a = Tag(soup, builder, 'a')
+        ol = Tag(soup, builder, 'ol')
+        a['href'] = 'http://foo.com/'
+        soup.body.insert(0, a)
+        soup.body.insert(1, ol)
+        self.assertEqual(
+            soup.body.encode(),
+            b'<body><a href="http://foo.com/"></a><ol></ol></body>')
+
+    def test_append_to_contents_moves_tag(self):
+        doc = """<p id="1">Don't leave me <b>here</b>.</p>
+                <p id="2">Don\'t leave!</p>"""
+        soup = self.soup(doc)
+        second_para = soup.find(id='2')
+        bold = soup.b
+
+        # Move the <b> tag to the end of the second paragraph.
+        soup.find(id='2').append(soup.b)
+
+        # The <b> tag is now a child of the second paragraph.
+        self.assertEqual(bold.parent, second_para)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                '<p id="1">Don\'t leave me .</p>\n'
+                '<p id="2">Don\'t leave!<b>here</b></p>'))
+
+    def test_replace_with_returns_thing_that_was_replaced(self):
+        text = "<a></a><b><c></c></b>"
+        soup = self.soup(text)
+        a = soup.a
+        new_a = a.replace_with(soup.c)
+        self.assertEqual(a, new_a)
+
+    def test_unwrap_returns_thing_that_was_replaced(self):
+        text = "<a><b></b><c></c></a>"
+        soup = self.soup(text)
+        a = soup.a
+        new_a = a.unwrap()
+        self.assertEqual(a, new_a)
+
+    def test_replace_tag_with_itself(self):
+        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
+        soup = self.soup(text)
+        c = soup.c
+        soup.c.replace_with(c)
+        self.assertEqual(soup.decode(), self.document_for(text))
+
+    def test_replace_tag_with_its_parent_raises_exception(self):
+        text = "<a><b></b></a>"
+        soup = self.soup(text)
+        self.assertRaises(ValueError, soup.b.replace_with, soup.a)
+
+    def test_insert_tag_into_itself_raises_exception(self):
+        text = "<a><b></b></a>"
+        soup = self.soup(text)
+        self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
+
+    def test_replace_with_maintains_next_element_throughout(self):
+        soup = self.soup('<p><a>one</a><b>three</b></p>')
+        a = soup.a
+        b = a.contents[0]
+        # Make it so the <a> tag has two text children.
+        a.insert(1, "two")
+
+        # Now replace each one with the empty string.
+        left, right = a.contents
+        left.replaceWith('')
+        right.replaceWith('')
+
+        # The <b> tag is still connected to the tree.
+        self.assertEqual("three", soup.b.string)
+
+    def test_replace_final_node(self):
+        soup = self.soup("<b>Argh!</b>")
+        soup.find(text="Argh!").replace_with("Hooray!")
+        new_text = soup.find(text="Hooray!")
+        b = soup.b
+        self.assertEqual(new_text.previous_element, b)
+        self.assertEqual(new_text.parent, b)
+        self.assertEqual(new_text.previous_element.next_element, new_text)
+        self.assertEqual(new_text.next_element, None)
+
+    def test_consecutive_text_nodes(self):
+        # A builder should never create two consecutive text nodes,
+        # but if you insert one next to another, Beautiful Soup will
+        # handle it correctly.
+        soup = self.soup("<a><b>Argh!</b><c></c></a>")
+        soup.b.insert(1, "Hooray!")
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a><b>Argh!Hooray!</b><c></c></a>"))
+
+        new_text = soup.find(text="Hooray!")
+        self.assertEqual(new_text.previous_element, "Argh!")
+        self.assertEqual(new_text.previous_element.next_element, new_text)
+
+        self.assertEqual(new_text.previous_sibling, "Argh!")
+        self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
+
+        self.assertEqual(new_text.next_sibling, None)
+        self.assertEqual(new_text.next_element, soup.c)
+
+    def test_insert_string(self):
+        soup = self.soup("<a></a>")
+        soup.a.insert(0, "bar")
+        soup.a.insert(0, "foo")
+        # The string were added to the tag.
+        self.assertEqual(["foo", "bar"], soup.a.contents)
+        # And they were converted to NavigableStrings.
+        self.assertEqual(soup.a.contents[0].next_element, "bar")
+
+    def test_insert_tag(self):
+        builder = self.default_builder
+        soup = self.soup(
+            "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
+        magic_tag = Tag(soup, builder, 'magictag')
+        magic_tag.insert(0, "the")
+        soup.a.insert(1, magic_tag)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
+
+        # Make sure all the relationships are hooked up correctly.
+        b_tag = soup.b
+        self.assertEqual(b_tag.next_sibling, magic_tag)
+        self.assertEqual(magic_tag.previous_sibling, b_tag)
+
+        find = b_tag.find(text="Find")
+        self.assertEqual(find.next_element, magic_tag)
+        self.assertEqual(magic_tag.previous_element, find)
+
+        c_tag = soup.c
+        self.assertEqual(magic_tag.next_sibling, c_tag)
+        self.assertEqual(c_tag.previous_sibling, magic_tag)
+
+        the = magic_tag.find(text="the")
+        self.assertEqual(the.parent, magic_tag)
+        self.assertEqual(the.next_element, c_tag)
+        self.assertEqual(c_tag.previous_element, the)
+
+    def test_append_child_thats_already_at_the_end(self):
+        data = "<a><b></b></a>"
+        soup = self.soup(data)
+        soup.a.append(soup.b)
+        self.assertEqual(data, soup.decode())
+
+    def test_move_tag_to_beginning_of_parent(self):
+        data = "<a><b></b><c></c><d></d></a>"
+        soup = self.soup(data)
+        soup.a.insert(0, soup.d)
+        self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
+
+    def test_insert_works_on_empty_element_tag(self):
+        # This is a little strange, since most HTML parsers don't allow
+        # markup like this to come through. But in general, we don't
+        # know what the parser would or wouldn't have allowed, so
+        # I'm letting this succeed for now.
+        soup = self.soup("<br/>")
+        soup.br.insert(1, "Contents")
+        self.assertEqual(str(soup.br), "<br>Contents</br>")
+
+    def test_insert_before(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        soup.b.insert_before("BAZ")
+        soup.a.insert_before("QUUX")
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
+
+        soup.a.insert_before(soup.b)
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
+
+    def test_insert_after(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        soup.b.insert_after("BAZ")
+        soup.a.insert_after("QUUX")
+        self.assertEqual(
+            soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
+        soup.b.insert_after(soup.a)
+        self.assertEqual(
+            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
+
+    def test_insert_after_raises_valueerror_if_after_has_no_meaning(self):
+        soup = self.soup("")
+        tag = soup.new_tag("a")
+        string = soup.new_string("")
+        self.assertRaises(ValueError, string.insert_after, tag)
+        self.assertRaises(ValueError, soup.insert_after, tag)
+        self.assertRaises(ValueError, tag.insert_after, tag)
+
+    def test_insert_before_raises_valueerror_if_before_has_no_meaning(self):
+        soup = self.soup("")
+        tag = soup.new_tag("a")
+        string = soup.new_string("")
+        self.assertRaises(ValueError, string.insert_before, tag)
+        self.assertRaises(ValueError, soup.insert_before, tag)
+        self.assertRaises(ValueError, tag.insert_before, tag)
+
+    def test_replace_with(self):
+        soup = self.soup(
+                "<p>There's <b>no</b> business like <b>show</b> business</p>")
+        no, show = soup.find_all('b')
+        show.replace_with(no)
+        self.assertEqual(
+            soup.decode(),
+            self.document_for(
+                "<p>There's  business like <b>no</b> business</p>"))
+
+        self.assertEqual(show.parent, None)
+        self.assertEqual(no.parent, soup.p)
+        self.assertEqual(no.next_element, "no")
+        self.assertEqual(no.next_sibling, " business")
+
+    def test_replace_first_child(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        soup.b.replace_with(soup.c)
+        self.assertEqual("<a><c></c></a>", soup.decode())
+
+    def test_replace_last_child(self):
+        data = "<a><b></b><c></c></a>"
+        soup = self.soup(data)
+        soup.c.replace_with(soup.b)
+        self.assertEqual("<a><b></b></a>", soup.decode())
+
+    def test_nested_tag_replace_with(self):
+        soup = self.soup(
+            """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
+
+        # Replace the entire <b> tag and its contents ("reserve the
+        # right") with the <f> tag ("refuse").
+        remove_tag = soup.b
+        move_tag = soup.f
+        remove_tag.replace_with(move_tag)
+
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
+
+        # The <b> tag is now an orphan.
+        self.assertEqual(remove_tag.parent, None)
+        self.assertEqual(remove_tag.find(text="right").next_element, None)
+        self.assertEqual(remove_tag.previous_element, None)
+        self.assertEqual(remove_tag.next_sibling, None)
+        self.assertEqual(remove_tag.previous_sibling, None)
+
+        # The <f> tag is now connected to the <a> tag.
+        self.assertEqual(move_tag.parent, soup.a)
+        self.assertEqual(move_tag.previous_element, "We")
+        self.assertEqual(move_tag.next_element.next_element, soup.e)
+        self.assertEqual(move_tag.next_sibling, None)
+
+        # The gap where the <f> tag used to be has been mended, and
+        # the word "to" is now connected to the <g> tag.
+        to_text = soup.find(text="to")
+        g_tag = soup.g
+        self.assertEqual(to_text.next_element, g_tag)
+        self.assertEqual(to_text.next_sibling, g_tag)
+        self.assertEqual(g_tag.previous_element, to_text)
+        self.assertEqual(g_tag.previous_sibling, to_text)
+
+    def test_unwrap(self):
+        tree = self.soup("""
+            <p>Unneeded <em>formatting</em> is unneeded</p>
+            """)
+        tree.em.unwrap()
+        self.assertEqual(tree.em, None)
+        self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
+
+    def test_wrap(self):
+        soup = self.soup("I wish I was bold.")
+        value = soup.string.wrap(soup.new_tag("b"))
+        self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
+        self.assertEqual(
+            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
+
+    def test_wrap_extracts_tag_from_elsewhere(self):
+        soup = self.soup("<b></b>I wish I was bold.")
+        soup.b.next_sibling.wrap(soup.b)
+        self.assertEqual(
+            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
+
+    def test_wrap_puts_new_contents_at_the_end(self):
+        soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
+        soup.b.next_sibling.wrap(soup.b)
+        self.assertEqual(2, len(soup.b.contents))
+        self.assertEqual(
+            soup.decode(), self.document_for(
+                "<b>I like being bold.I wish I was bold.</b>"))
+
+    def test_extract(self):
+        soup = self.soup(
+            '<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
+
+        self.assertEqual(len(soup.body.contents), 3)
+        extracted = soup.find(id="nav").extract()
+
+        self.assertEqual(
+            soup.decode(), "<html><body>Some content.  More content.</body></html>")
+        self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
+
+        # The extracted tag is now an orphan.
+        self.assertEqual(len(soup.body.contents), 2)
+        self.assertEqual(extracted.parent, None)
+        self.assertEqual(extracted.previous_element, None)
+        self.assertEqual(extracted.next_element.next_element, None)
+
+        # The gap where the extracted tag used to be has been mended.
+        content_1 = soup.find(text="Some content. ")
+        content_2 = soup.find(text=" More content.")
+        self.assertEqual(content_1.next_element, content_2)
+        self.assertEqual(content_1.next_sibling, content_2)
+        self.assertEqual(content_2.previous_element, content_1)
+        self.assertEqual(content_2.previous_sibling, content_1)
+
+    def test_extract_distinguishes_between_identical_strings(self):
+        soup = self.soup("<a>foo</a><b>bar</b>")
+        foo_1 = soup.a.string
+        bar_1 = soup.b.string
+        foo_2 = soup.new_string("foo")
+        bar_2 = soup.new_string("bar")
+        soup.a.append(foo_2)
+        soup.b.append(bar_2)
+
+        # Now there are two identical strings in the <a> tag, and two
+        # in the <b> tag. Let's remove the first "foo" and the second
+        # "bar".
+        foo_1.extract()
+        bar_2.extract()
+        self.assertEqual(foo_2, soup.a.string)
+        self.assertEqual(bar_2, soup.b.string)
+
+    def test_clear(self):
+        """Tag.clear()"""
+        soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
+        # clear using extract()
+        a = soup.a
+        soup.p.clear()
+        self.assertEqual(len(soup.p.contents), 0)
+        self.assertTrue(hasattr(a, "contents"))
+
+        # clear using decompose()
+        em = a.em
+        a.clear(decompose=True)
+        self.assertFalse(hasattr(em, "contents"))
+
+    def test_string_set(self):
+        """Tag.string = 'string'"""
+        soup = self.soup("<a></a> <b><c></c></b>")
+        soup.a.string = "foo"
+        self.assertEqual(soup.a.contents, ["foo"])
+        soup.b.string = "bar"
+        self.assertEqual(soup.b.contents, ["bar"])
+
+    def test_string_set_does_not_affect_original_string(self):
+        soup = self.soup("<a><b>foo</b><c>bar</c>")
+        soup.b.string = soup.c.string
+        self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
+
+    def test_set_string_preserves_class_of_string(self):
+        soup = self.soup("<a></a>")
+        cdata = CData("foo")
+        soup.a.string = cdata
+        self.assertTrue(isinstance(soup.a.string, CData))
+
+class TestElementObjects(SoupTest):
+    """Test various features of element objects."""
+
+    def test_len(self):
+        """The length of an element is its number of children."""
+        soup = self.soup("<top>1<b>2</b>3</top>")
+
+        # The BeautifulSoup object itself contains one element: the
+        # <top> tag.
+        self.assertEqual(len(soup.contents), 1)
+        self.assertEqual(len(soup), 1)
+
+        # The <top> tag contains three elements: the text node "1", the
+        # <b> tag, and the text node "3".
+        self.assertEqual(len(soup.top), 3)
+        self.assertEqual(len(soup.top.contents), 3)
+
+    def test_member_access_invokes_find(self):
+        """Accessing a Python member .foo invokes find('foo')"""
+        soup = self.soup('<b><i></i></b>')
+        self.assertEqual(soup.b, soup.find('b'))
+        self.assertEqual(soup.b.i, soup.find('b').find('i'))
+        self.assertEqual(soup.a, None)
+
+    def test_deprecated_member_access(self):
+        soup = self.soup('<b><i></i></b>')
+        with warnings.catch_warnings(record=True) as w:
+            tag = soup.bTag
+        self.assertEqual(soup.b, tag)
+        self.assertEqual(
+            '.bTag is deprecated, use .find("b") instead.',
+            str(w[0].message))
+
+    def test_has_attr(self):
+        """has_attr() checks for the presence of an attribute.
+
+        Please note note: has_attr() is different from
+        __in__. has_attr() checks the tag's attributes and __in__
+        checks the tag's chidlren.
+        """
+        soup = self.soup("<foo attr='bar'>")
+        self.assertTrue(soup.foo.has_attr('attr'))
+        self.assertFalse(soup.foo.has_attr('attr2'))
+
+
+    def test_attributes_come_out_in_alphabetical_order(self):
+        markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
+        self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
+
+    def test_string(self):
+        # A tag that contains only a text node makes that node
+        # available as .string.
+        soup = self.soup("<b>foo</b>")
+        self.assertEqual(soup.b.string, 'foo')
+
+    def test_empty_tag_has_no_string(self):
+        # A tag with no children has no .stirng.
+        soup = self.soup("<b></b>")
+        self.assertEqual(soup.b.string, None)
+
+    def test_tag_with_multiple_children_has_no_string(self):
+        # A tag with no children has no .string.
+        soup = self.soup("<a>foo<b></b><b></b></b>")
+        self.assertEqual(soup.b.string, None)
+
+        soup = self.soup("<a>foo<b></b>bar</b>")
+        self.assertEqual(soup.b.string, None)
+
+        # Even if all the children are strings, due to trickery,
+        # it won't work--but this would be a good optimization.
+        soup = self.soup("<a>foo</b>")
+        soup.a.insert(1, "bar")
+        self.assertEqual(soup.a.string, None)
+
+    def test_tag_with_recursive_string_has_string(self):
+        # A tag with a single child which has a .string inherits that
+        # .string.
+        soup = self.soup("<a><b>foo</b></a>")
+        self.assertEqual(soup.a.string, "foo")
+        self.assertEqual(soup.string, "foo")
+
+    def test_lack_of_string(self):
+        """Only a tag containing a single text node has a .string."""
+        soup = self.soup("<b>f<i>e</i>o</b>")
+        self.assertFalse(soup.b.string)
+
+        soup = self.soup("<b></b>")
+        self.assertFalse(soup.b.string)
+
+    def test_all_text(self):
+        """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
+        soup = self.soup("<a>a<b>r</b>   <r> t </r></a>")
+        self.assertEqual(soup.a.text, "ar  t ")
+        self.assertEqual(soup.a.get_text(strip=True), "art")
+        self.assertEqual(soup.a.get_text(","), "a,r, , t ")
+        self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
+
+class TestCDAtaListAttributes(SoupTest):
+
+    """Testing cdata-list attributes like 'class'.
+    """
+    def test_single_value_becomes_list(self):
+        soup = self.soup("<a class='foo'>")
+        self.assertEqual(["foo"],soup.a['class'])
+
+    def test_multiple_values_becomes_list(self):
+        soup = self.soup("<a class='foo bar'>")
+        self.assertEqual(["foo", "bar"], soup.a['class'])
+
+    def test_multiple_values_separated_by_weird_whitespace(self):
+        soup = self.soup("<a class='foo\tbar\nbaz'>")
+        self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
+
+    def test_attributes_joined_into_string_on_output(self):
+        soup = self.soup("<a class='foo\tbar'>")
+        self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
+
+    def test_accept_charset(self):
+        soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
+        self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
+
+    def test_cdata_attribute_applying_only_to_one_tag(self):
+        data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
+        soup = self.soup(data)
+        # We saw in another test that accept-charset is a cdata-list
+        # attribute for the <form> tag. But it's not a cdata-list
+        # attribute for any other tag.
+        self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
+
+
+class TestPersistence(SoupTest):
+    "Testing features like pickle and deepcopy."
+
+    def setUp(self):
+        super(TestPersistence, self).setUp()
+        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
+"http://www.w3.org/TR/REC-html40/transitional.dtd">
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
+<link rev="made" href="mailto:leonardr@segfault.org">
+<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
+<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
+<meta name="author" content="Leonard Richardson">
+</head>
+<body>
+<a href="foo">foo</a>
+<a href="foo"><b>bar</b></a>
+</body>
+</html>"""
+        self.tree = self.soup(self.page)
+
+    def test_pickle_and_unpickle_identity(self):
+        # Pickling a tree, then unpickling it, yields a tree identical
+        # to the original.
+        dumped = pickle.dumps(self.tree, 2)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.__class__, BeautifulSoup)
+        self.assertEqual(loaded.decode(), self.tree.decode())
+
+    def test_deepcopy_identity(self):
+        # Making a deepcopy of a tree yields an identical tree.
+        copied = copy.deepcopy(self.tree)
+        self.assertEqual(copied.decode(), self.tree.decode())
+
+    def test_unicode_pickle(self):
+        # A tree containing Unicode characters can be pickled.
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
+        loaded = pickle.loads(dumped)
+        self.assertEqual(loaded.decode(), soup.decode())
+
+
+class TestSubstitutions(SoupTest):
+
+    def test_default_formatter_is_minimal(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="minimal")
+        # The < is converted back into &lt; but the e-with-acute is left alone.
+        self.assertEqual(
+            decoded,
+            self.document_for(
+                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+
+    def test_formatter_html(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="html")
+        self.assertEqual(
+            decoded,
+            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
+
+    def test_formatter_minimal(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter="minimal")
+        # The < is converted back into &lt; but the e-with-acute is left alone.
+        self.assertEqual(
+            decoded,
+            self.document_for(
+                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
+
+    def test_formatter_null(self):
+        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter=None)
+        # Neither the angle brackets nor the e-with-acute are converted.
+        # This is not valid HTML, but it's what the user wanted.
+        self.assertEqual(decoded,
+                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
+
+    def test_formatter_custom(self):
+        markup = u"<b>&lt;foo&gt;</b><b>bar</b>"
+        soup = self.soup(markup)
+        decoded = soup.decode(formatter = lambda x: x.upper())
+        # Instead of normal entity conversion code, the custom
+        # callable is called on every string.
+        self.assertEqual(
+            decoded,
+            self.document_for(u"<b><FOO></b><b>BAR</b>"))
+
+    def test_formatter_is_run_on_attribute_values(self):
+        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
+        soup = self.soup(markup)
+        a = soup.a
+
+        expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
+
+        self.assertEqual(expect_minimal, a.decode())
+        self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
+
+        expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
+        self.assertEqual(expect_html, a.decode(formatter="html"))
+
+        self.assertEqual(markup, a.decode(formatter=None))
+        expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
+        self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
+
+    def test_prettify_accepts_formatter(self):
+        soup = BeautifulSoup("<html><body>foo</body></html>")
+        pretty = soup.prettify(formatter = lambda x: x.upper())
+        self.assertTrue("FOO" in pretty)
+
+    def test_prettify_outputs_unicode_by_default(self):
+        soup = self.soup("<a></a>")
+        self.assertEqual(unicode, type(soup.prettify()))
+
+    def test_prettify_can_encode_data(self):
+        soup = self.soup("<a></a>")
+        self.assertEqual(bytes, type(soup.prettify("utf-8")))
+
+    def test_html_entity_substitution_off_by_default(self):
+        markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
+        soup = self.soup(markup)
+        encoded = soup.b.encode("utf-8")
+        self.assertEqual(encoded, markup.encode('utf-8'))
+
+    def test_encoding_substitution(self):
+        # Here's the <meta> tag saying that a document is
+        # encoded in Shift-JIS.
+        meta_tag = ('<meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/>')
+        soup = self.soup(meta_tag)
+
+        # Parse the document, and the charset apprears unchanged.
+        self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
+
+        # Encode the document into some encoding, and the encoding is
+        # substituted into the meta tag.
+        utf_8 = soup.encode("utf-8")
+        self.assertTrue(b"charset=utf-8" in utf_8)
+
+        euc_jp = soup.encode("euc_jp")
+        self.assertTrue(b"charset=euc_jp" in euc_jp)
+
+        shift_jis = soup.encode("shift-jis")
+        self.assertTrue(b"charset=shift-jis" in shift_jis)
+
+        utf_16_u = soup.encode("utf-16").decode("utf-16")
+        self.assertTrue("charset=utf-16" in utf_16_u)
+
+    def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
+        markup = ('<head><meta content="text/html; charset=x-sjis" '
+                    'http-equiv="Content-type"/></head><pre>foo</pre>')
+
+        # Beautiful Soup used to try to rewrite the meta tag even if the
+        # meta tag got filtered out by the strainer. This test makes
+        # sure that doesn't happen.
+        strainer = SoupStrainer('pre')
+        soup = self.soup(markup, parse_only=strainer)
+        self.assertEqual(soup.contents[0].name, 'pre')
+
+class TestEncoding(SoupTest):
+    """Test the ability to encode objects into strings."""
+
+    def test_unicode_string_can_be_encoded(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(soup.b.string.encode("utf-8"),
+                          u"\N{SNOWMAN}".encode("utf-8"))
+
+    def test_tag_containing_unicode_string_can_be_encoded(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            soup.b.encode("utf-8"), html.encode("utf-8"))
+
+    def test_encoding_substitutes_unrecognized_characters_by_default(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
+
+    def test_encoding_can_be_made_strict(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertRaises(
+            UnicodeEncodeError, soup.encode, "ascii", errors="strict")
+
+    def test_decode_contents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
+
+    def test_encode_contents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
+                encoding="utf8"))
+
+    def test_deprecated_renderContents(self):
+        html = u"<b>\N{SNOWMAN}</b>"
+        soup = self.soup(html)
+        self.assertEqual(
+            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
+
+class TestNavigableStringSubclasses(SoupTest):
+
+    def test_cdata(self):
+        # None of the current builders turn CDATA sections into CData
+        # objects, but you can create them manually.
+        soup = self.soup("")
+        cdata = CData("foo")
+        soup.insert(1, cdata)
+        self.assertEqual(str(soup), "<![CDATA[foo]]>")
+        self.assertEqual(soup.find(text="foo"), "foo")
+        self.assertEqual(soup.contents[0], "foo")
+
+    def test_cdata_is_never_formatted(self):
+        """Text inside a CData object is passed into the formatter.
+
+        But the return value is ignored.
+        """
+
+        self.count = 0
+        def increment(*args):
+            self.count += 1
+            return "BITTER FAILURE"
+
+        soup = self.soup("")
+        cdata = CData("<><><>")
+        soup.insert(1, cdata)
+        self.assertEqual(
+            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
+        self.assertEqual(1, self.count)
+
+    def test_doctype_ends_in_newline(self):
+        # Unlike other NavigableString subclasses, a DOCTYPE always ends
+        # in a newline.
+        doctype = Doctype("foo")
+        soup = self.soup("")
+        soup.insert(1, doctype)
+        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
+
+
+class TestSoupSelector(TreeTest):
+
+    HTML = """
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+"http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+<title>The title</title>
+<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
+</head>
+<body>
+
+<div id="main">
+<div id="inner">
+<h1 id="header1">An H1</h1>
+<p>Some text</p>
+<p class="onep" id="p1">Some more text</p>
+<h2 id="header2">An H2</h2>
+<p class="class1 class2 class3" id="pmulti">Another</p>
+<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
+<h2 id="header3">Another H2</h2>
+<a id="me" href="http://simonwillison.net/" rel="me">me</a>
+<span class="s1">
+<a href="#" id="s1a1">span1a1</a>
+<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
+<span class="span2">
+<a href="#" id="s2a1">span2a1</a>
+</span>
+<span class="span3"></span>
+</span>
+</div>
+<p lang="en" id="lang-en">English</p>
+<p lang="en-gb" id="lang-en-gb">English UK</p>
+<p lang="en-us" id="lang-en-us">English US</p>
+<p lang="fr" id="lang-fr">French</p>
+</div>
+
+<div id="footer">
+</div>
+"""
+
+    def setUp(self):
+        self.soup = BeautifulSoup(self.HTML)
+
+    def assertSelects(self, selector, expected_ids):
+        el_ids = [el['id'] for el in self.soup.select(selector)]
+        el_ids.sort()
+        expected_ids.sort()
+        self.assertEqual(expected_ids, el_ids,
+            "Selector %s, expected [%s], got [%s]" % (
+                selector, ', '.join(expected_ids), ', '.join(el_ids)
+            )
+        )
+
+    assertSelect = assertSelects
+
+    def assertSelectMultiple(self, *tests):
+        for selector, expected_ids in tests:
+            self.assertSelect(selector, expected_ids)
+
+    def test_one_tag_one(self):
+        els = self.soup.select('title')
+        self.assertEqual(len(els), 1)
+        self.assertEqual(els[0].name, 'title')
+        self.assertEqual(els[0].contents, [u'The title'])
+
+    def test_one_tag_many(self):
+        els = self.soup.select('div')
+        self.assertEqual(len(els), 3)
+        for div in els:
+            self.assertEqual(div.name, 'div')
+
+    def test_tag_in_tag_one(self):
+        els = self.soup.select('div div')
+        self.assertSelects('div div', ['inner'])
+
+    def test_tag_in_tag_many(self):
+        for selector in ('html div', 'html body div', 'body div'):
+            self.assertSelects(selector, ['main', 'inner', 'footer'])
+
+    def test_tag_no_match(self):
+        self.assertEqual(len(self.soup.select('del')), 0)
+
+    def test_invalid_tag(self):
+        self.assertEqual(len(self.soup.select('tag%t')), 0)
+
+    def test_header_tags(self):
+        self.assertSelectMultiple(
+            ('h1', ['header1']),
+            ('h2', ['header2', 'header3']),
+        )
+
+    def test_class_one(self):
+        for selector in ('.onep', 'p.onep', 'html p.onep'):
+            els = self.soup.select(selector)
+            self.assertEqual(len(els), 1)
+            self.assertEqual(els[0].name, 'p')
+            self.assertEqual(els[0]['class'], ['onep'])
+
+    def test_class_mismatched_tag(self):
+        els = self.soup.select('div.onep')
+        self.assertEqual(len(els), 0)
+
+    def test_one_id(self):
+        for selector in ('div#inner', '#inner', 'div div#inner'):
+            self.assertSelects(selector, ['inner'])
+
+    def test_bad_id(self):
+        els = self.soup.select('#doesnotexist')
+        self.assertEqual(len(els), 0)
+
+    def test_items_in_id(self):
+        els = self.soup.select('div#inner p')
+        self.assertEqual(len(els), 3)
+        for el in els:
+            self.assertEqual(el.name, 'p')
+        self.assertEqual(els[1]['class'], ['onep'])
+        self.assertFalse(els[0].has_key('class'))
+
+    def test_a_bunch_of_emptys(self):
+        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
+            self.assertEqual(len(self.soup.select(selector)), 0)
+
+    def test_multi_class_support(self):
+        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
+            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
+            self.assertSelects(selector, ['pmulti'])
+
+    def test_multi_class_selection(self):
+        for selector in ('.class1.class3', '.class3.class2',
+                         '.class1.class2.class3'):
+            self.assertSelects(selector, ['pmulti'])
+
+    def test_child_selector(self):
+        self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
+        self.assertSelects('.s1 > a span', ['s1a2s1'])
+
+    def test_attribute_equals(self):
+        self.assertSelectMultiple(
+            ('p[class="onep"]', ['p1']),
+            ('p[id="p1"]', ['p1']),
+            ('[class="onep"]', ['p1']),
+            ('[id="p1"]', ['p1']),
+            ('link[rel="stylesheet"]', ['l1']),
+            ('link[type="text/css"]', ['l1']),
+            ('link[href="blah.css"]', ['l1']),
+            ('link[href="no-blah.css"]', []),
+            ('[rel="stylesheet"]', ['l1']),
+            ('[type="text/css"]', ['l1']),
+            ('[href="blah.css"]', ['l1']),
+            ('[href="no-blah.css"]', []),
+            ('p[href="no-blah.css"]', []),
+            ('[href="no-blah.css"]', []),
+        )
+
+    def test_attribute_tilde(self):
+        self.assertSelectMultiple(
+            ('p[class~="class1"]', ['pmulti']),
+            ('p[class~="class2"]', ['pmulti']),
+            ('p[class~="class3"]', ['pmulti']),
+            ('[class~="class1"]', ['pmulti']),
+            ('[class~="class2"]', ['pmulti']),
+            ('[class~="class3"]', ['pmulti']),
+            ('a[rel~="friend"]', ['bob']),
+            ('a[rel~="met"]', ['bob']),
+            ('[rel~="friend"]', ['bob']),
+            ('[rel~="met"]', ['bob']),
+        )
+
+    def test_attribute_startswith(self):
+        self.assertSelectMultiple(
+            ('[rel^="style"]', ['l1']),
+            ('link[rel^="style"]', ['l1']),
+            ('notlink[rel^="notstyle"]', []),
+            ('[rel^="notstyle"]', []),
+            ('link[rel^="notstyle"]', []),
+            ('link[href^="bla"]', ['l1']),
+            ('a[href^="http://"]', ['bob', 'me']),
+            ('[href^="http://"]', ['bob', 'me']),
+            ('[id^="p"]', ['pmulti', 'p1']),
+            ('[id^="m"]', ['me', 'main']),
+            ('div[id^="m"]', ['main']),
+            ('a[id^="m"]', ['me']),
+        )
+
+    def test_attribute_endswith(self):
+        self.assertSelectMultiple(
+            ('[href$=".css"]', ['l1']),
+            ('link[href$=".css"]', ['l1']),
+            ('link[id$="1"]', ['l1']),
+            ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
+            ('div[id$="1"]', []),
+            ('[id$="noending"]', []),
+        )
+
+    def test_attribute_contains(self):
+        self.assertSelectMultiple(
+            # From test_attribute_startswith
+            ('[rel*="style"]', ['l1']),
+            ('link[rel*="style"]', ['l1']),
+            ('notlink[rel*="notstyle"]', []),
+            ('[rel*="notstyle"]', []),
+            ('link[rel*="notstyle"]', []),
+            ('link[href*="bla"]', ['l1']),
+            ('a[href*="http://"]', ['bob', 'me']),
+            ('[href*="http://"]', ['bob', 'me']),
+            ('[id*="p"]', ['pmulti', 'p1']),
+            ('div[id*="m"]', ['main']),
+            ('a[id*="m"]', ['me']),
+            # From test_attribute_endswith
+            ('[href*=".css"]', ['l1']),
+            ('link[href*=".css"]', ['l1']),
+            ('link[id*="1"]', ['l1']),
+            ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
+            ('div[id*="1"]', []),
+            ('[id*="noending"]', []),
+            # New for this test
+            ('[href*="."]', ['bob', 'me', 'l1']),
+            ('a[href*="."]', ['bob', 'me']),
+            ('link[href*="."]', ['l1']),
+            ('div[id*="n"]', ['main', 'inner']),
+            ('div[id*="nn"]', ['inner']),
+        )
+
+    def test_attribute_exact_or_hypen(self):
+        self.assertSelectMultiple(
+            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
+            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
+            ('p[lang|="fr"]', ['lang-fr']),
+            ('p[lang|="gb"]', []),
+        )
+
+    def test_attribute_exists(self):
+        self.assertSelectMultiple(
+            ('[rel]', ['l1', 'bob', 'me']),
+            ('link[rel]', ['l1']),
+            ('a[rel]', ['bob', 'me']),
+            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
+            ('p[class]', ['p1', 'pmulti']),
+            ('[blah]', []),
+            ('p[blah]', []),
+        )
+
+    def test_select_on_element(self):
+        # Other tests operate on the tree; this operates on an element
+        # within the tree.
+        inner = self.soup.find("div", id="main")
+        selected = inner.select("div")
+        # The <div id="inner"> tag was selected. The <div id="footer">
+        # tag was not.
+        self.assertSelectsIDs(selected, ['inner'])

From 4f74e217a1c1f2e0452d16d71c2c26ca6e14e507 Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Thu, 23 Aug 2012 17:48:01 +1200
Subject: [PATCH 2/6] rutracker Various Artists fix

---
 headphones/searcher_rutracker.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py
index dcdd60a3..313bab4c 100644
--- a/headphones/searcher_rutracker.py
+++ b/headphones/searcher_rutracker.py
@@ -64,8 +64,10 @@ class Rutracker():
          
         # Build search url
         
-        searchterm = artist
-        searchterm = searchterm + ' '
+        searchterm = ''
+        if artist != 'Various Artists':
+            searchterm = artist
+            searchterm = searchterm + ' '
         searchterm = searchterm + album
         searchterm = searchterm + ' '
         searchterm = searchterm + year

From 7aac60cecd3a7636d388ee2687a1e75f04a8605d Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Fri, 24 Aug 2012 11:24:35 +1200
Subject: [PATCH 3/6] Moved BeautifulSoup bs4 directory to headphones parent
 directory

I guess the bs4 directory should be under lib but I couldn't get the
import to work from here
---
 {lib/bs4 => bs4}/__init__.py                    | 0
 {lib/bs4 => bs4}/builder/__init__.py            | 0
 {lib/bs4 => bs4}/builder/_html5lib.py           | 0
 {lib/bs4 => bs4}/builder/_htmlparser.py         | 0
 {lib/bs4 => bs4}/builder/_lxml.py               | 0
 {lib/bs4 => bs4}/dammit.py                      | 0
 {lib/bs4 => bs4}/element.py                     | 0
 {lib/bs4 => bs4}/testing.py                     | 0
 {lib/bs4 => bs4}/tests/__init__.py              | 0
 {lib/bs4 => bs4}/tests/test_builder_registry.py | 0
 {lib/bs4 => bs4}/tests/test_docs.py             | 0
 {lib/bs4 => bs4}/tests/test_html5lib.py         | 0
 {lib/bs4 => bs4}/tests/test_htmlparser.py       | 0
 {lib/bs4 => bs4}/tests/test_lxml.py             | 0
 {lib/bs4 => bs4}/tests/test_soup.py             | 0
 {lib/bs4 => bs4}/tests/test_tree.py             | 0
 16 files changed, 0 insertions(+), 0 deletions(-)
 rename {lib/bs4 => bs4}/__init__.py (100%)
 rename {lib/bs4 => bs4}/builder/__init__.py (100%)
 rename {lib/bs4 => bs4}/builder/_html5lib.py (100%)
 rename {lib/bs4 => bs4}/builder/_htmlparser.py (100%)
 rename {lib/bs4 => bs4}/builder/_lxml.py (100%)
 rename {lib/bs4 => bs4}/dammit.py (100%)
 rename {lib/bs4 => bs4}/element.py (100%)
 rename {lib/bs4 => bs4}/testing.py (100%)
 rename {lib/bs4 => bs4}/tests/__init__.py (100%)
 rename {lib/bs4 => bs4}/tests/test_builder_registry.py (100%)
 rename {lib/bs4 => bs4}/tests/test_docs.py (100%)
 rename {lib/bs4 => bs4}/tests/test_html5lib.py (100%)
 rename {lib/bs4 => bs4}/tests/test_htmlparser.py (100%)
 rename {lib/bs4 => bs4}/tests/test_lxml.py (100%)
 rename {lib/bs4 => bs4}/tests/test_soup.py (100%)
 rename {lib/bs4 => bs4}/tests/test_tree.py (100%)

diff --git a/lib/bs4/__init__.py b/bs4/__init__.py
similarity index 100%
rename from lib/bs4/__init__.py
rename to bs4/__init__.py
diff --git a/lib/bs4/builder/__init__.py b/bs4/builder/__init__.py
similarity index 100%
rename from lib/bs4/builder/__init__.py
rename to bs4/builder/__init__.py
diff --git a/lib/bs4/builder/_html5lib.py b/bs4/builder/_html5lib.py
similarity index 100%
rename from lib/bs4/builder/_html5lib.py
rename to bs4/builder/_html5lib.py
diff --git a/lib/bs4/builder/_htmlparser.py b/bs4/builder/_htmlparser.py
similarity index 100%
rename from lib/bs4/builder/_htmlparser.py
rename to bs4/builder/_htmlparser.py
diff --git a/lib/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
similarity index 100%
rename from lib/bs4/builder/_lxml.py
rename to bs4/builder/_lxml.py
diff --git a/lib/bs4/dammit.py b/bs4/dammit.py
similarity index 100%
rename from lib/bs4/dammit.py
rename to bs4/dammit.py
diff --git a/lib/bs4/element.py b/bs4/element.py
similarity index 100%
rename from lib/bs4/element.py
rename to bs4/element.py
diff --git a/lib/bs4/testing.py b/bs4/testing.py
similarity index 100%
rename from lib/bs4/testing.py
rename to bs4/testing.py
diff --git a/lib/bs4/tests/__init__.py b/bs4/tests/__init__.py
similarity index 100%
rename from lib/bs4/tests/__init__.py
rename to bs4/tests/__init__.py
diff --git a/lib/bs4/tests/test_builder_registry.py b/bs4/tests/test_builder_registry.py
similarity index 100%
rename from lib/bs4/tests/test_builder_registry.py
rename to bs4/tests/test_builder_registry.py
diff --git a/lib/bs4/tests/test_docs.py b/bs4/tests/test_docs.py
similarity index 100%
rename from lib/bs4/tests/test_docs.py
rename to bs4/tests/test_docs.py
diff --git a/lib/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
similarity index 100%
rename from lib/bs4/tests/test_html5lib.py
rename to bs4/tests/test_html5lib.py
diff --git a/lib/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
similarity index 100%
rename from lib/bs4/tests/test_htmlparser.py
rename to bs4/tests/test_htmlparser.py
diff --git a/lib/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
similarity index 100%
rename from lib/bs4/tests/test_lxml.py
rename to bs4/tests/test_lxml.py
diff --git a/lib/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
similarity index 100%
rename from lib/bs4/tests/test_soup.py
rename to bs4/tests/test_soup.py
diff --git a/lib/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
similarity index 100%
rename from lib/bs4/tests/test_tree.py
rename to bs4/tests/test_tree.py

From b8c57827655a9e679c70ba6283f38de801b2cb37 Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Thu, 6 Sep 2012 10:47:07 +1200
Subject: [PATCH 4/6] Include html5lib for BeautifulSoup

BeautifulSoup needs lxml or html5, have included html5lib.

Also latest BeautifulSoup 4.1.3
---
 bs4/__init__.py                               |    14 +-
 bs4/builder/__init__.py                       |    11 +-
 bs4/builder/_lxml.py                          |    30 +-
 bs4/dammit.py                                 |    39 +-
 bs4/element.py                                |    22 +-
 bs4/testing.py                                |    22 +
 bs4/tests/__init__.py                         |     1 -
 bs4/tests/test_builder_registry.py            |   141 -
 bs4/tests/test_docs.py                        |    36 -
 bs4/tests/test_html5lib.py                    |    58 -
 bs4/tests/test_htmlparser.py                  |    19 -
 bs4/tests/test_lxml.py                        |    75 -
 bs4/tests/test_soup.py                        |   368 -
 bs4/tests/test_tree.py                        |  1695 -
 data/interfaces/default/base.html             |     2 +-
 data/interfaces/default/config.html           |     2 +-
 headphones/versioncheck.py                    |     2 +-
 html5lib/__init__.py                          |    17 +
 html5lib/constants.py                         |  3085 ++
 html5lib/filters/__init__.py                  |     0
 html5lib/filters/_base.py                     |    10 +
 html5lib/filters/formfiller.py                |   127 +
 html5lib/filters/inject_meta_charset.py       |    62 +
 html5lib/filters/lint.py                      |    88 +
 html5lib/filters/optionaltags.py              |   202 +
 html5lib/filters/sanitizer.py                 |     8 +
 html5lib/filters/whitespace.py                |    41 +
 html5lib/html5parser.py                       |  2733 +
 html5lib/ihatexml.py                          |   177 +
 html5lib/inputstream.py                       |   782 +
 html5lib/sanitizer.py                         |   258 +
 html5lib/serializer/__init__.py               |    17 +
 html5lib/serializer/htmlserializer.py         |   312 +
 html5lib/serializer/xhtmlserializer.py        |     9 +
 html5lib/tests/__init__.py                    |    12 +
 html5lib/tests/mockParser.py                  |    37 +
 html5lib/tests/runparsertests.py              |    27 +
 html5lib/tests/runtests.py                    |    20 +
 html5lib/tests/support.py                     |   127 +
 html5lib/tests/test_encoding.py               |    54 +
 html5lib/tests/test_formfiller.py             |   296 +
 html5lib/tests/test_parser.py                 |   140 +
 html5lib/tests/test_parser2.py                |    39 +
 html5lib/tests/test_sanitizer.py              |    76 +
 html5lib/tests/test_serializer.py             |   180 +
 html5lib/tests/test_stream.py                 |    97 +
 html5lib/tests/test_tokenizer.py              |   193 +
 html5lib/tests/test_treewalkers.py            |   311 +
 html5lib/tests/test_whitespace_filter.py      |   123 +
 .../tests/testdata/encoding/test-yahoo-jp.dat |    10 +
 html5lib/tests/testdata/encoding/tests1.dat   |   394 +
 html5lib/tests/testdata/encoding/tests2.dat   |   115 +
 html5lib/tests/testdata/sanitizer/tests1.dat  |   501 +
 html5lib/tests/testdata/serializer/core.test  |   125 +
 .../tests/testdata/serializer/injectmeta.test |    66 +
 .../testdata/serializer/optionaltags.test     |   965 +
 .../tests/testdata/serializer/options.test    |    60 +
 .../tests/testdata/serializer/whitespace.test |    51 +
 .../tests/testdata/sniffer/htmlOrFeed.json    |    43 +
 .../testdata/tokenizer/contentModelFlags.test |    75 +
 html5lib/tests/testdata/tokenizer/domjs.test  |    90 +
 .../tests/testdata/tokenizer/entities.test    |   283 +
 .../tests/testdata/tokenizer/escapeFlag.test  |    33 +
 .../testdata/tokenizer/namedEntities.test     | 44189 ++++++++++++++++
 .../testdata/tokenizer/numericEntities.test   |  1313 +
 .../tokenizer/pendingSpecChanges.test         |     7 +
 html5lib/tests/testdata/tokenizer/test1.test  |   196 +
 html5lib/tests/testdata/tokenizer/test2.test  |   179 +
 html5lib/tests/testdata/tokenizer/test3.test  |  6047 +++
 html5lib/tests/testdata/tokenizer/test4.test  |   344 +
 .../testdata/tokenizer/unicodeChars.test      |  1295 +
 .../tokenizer/unicodeCharsProblematic.test    |    27 +
 .../testdata/tokenizer/xmlViolation.test      |    22 +
 .../testdata/tree-construction/adoption01.dat |   194 +
 .../testdata/tree-construction/adoption02.dat |    31 +
 .../testdata/tree-construction/comments01.dat |   135 +
 .../testdata/tree-construction/doctype01.dat  |   370 +
 .../tree-construction/domjs-unsafe.dat        |   Bin 0 -> 6639 bytes
 .../testdata/tree-construction/entities01.dat |   603 +
 .../testdata/tree-construction/entities02.dat |   249 +
 .../tree-construction/html5test-com.dat       |   246 +
 .../testdata/tree-construction/inbody01.dat   |    43 +
 .../testdata/tree-construction/isindex.dat    |    40 +
 ...pending-spec-changes-plain-text-unsafe.dat |   Bin 0 -> 115 bytes
 .../pending-spec-changes.dat                  |    52 +
 .../tree-construction/plain-text-unsafe.dat   |   Bin 0 -> 4166 bytes
 .../tree-construction/scriptdata01.dat        |   308 +
 .../testdata/tree-construction/tables01.dat   |   212 +
 .../testdata/tree-construction/tests1.dat     |  1952 +
 .../testdata/tree-construction/tests10.dat    |   799 +
 .../testdata/tree-construction/tests11.dat    |   482 +
 .../testdata/tree-construction/tests12.dat    |    62 +
 .../testdata/tree-construction/tests14.dat    |    74 +
 .../testdata/tree-construction/tests15.dat    |   208 +
 .../testdata/tree-construction/tests16.dat    |  2299 +
 .../testdata/tree-construction/tests17.dat    |   153 +
 .../testdata/tree-construction/tests18.dat    |   269 +
 .../testdata/tree-construction/tests19.dat    |  1237 +
 .../testdata/tree-construction/tests2.dat     |   763 +
 .../testdata/tree-construction/tests20.dat    |   455 +
 .../testdata/tree-construction/tests21.dat    |   221 +
 .../testdata/tree-construction/tests22.dat    |   157 +
 .../testdata/tree-construction/tests23.dat    |   155 +
 .../testdata/tree-construction/tests24.dat    |    79 +
 .../testdata/tree-construction/tests25.dat    |   219 +
 .../testdata/tree-construction/tests26.dat    |   313 +
 .../testdata/tree-construction/tests3.dat     |   305 +
 .../testdata/tree-construction/tests4.dat     |    59 +
 .../testdata/tree-construction/tests5.dat     |   191 +
 .../testdata/tree-construction/tests6.dat     |   663 +
 .../testdata/tree-construction/tests7.dat     |   390 +
 .../testdata/tree-construction/tests8.dat     |   148 +
 .../testdata/tree-construction/tests9.dat     |   457 +
 .../tree-construction/tests_innerHTML_1.dat   |   741 +
 .../testdata/tree-construction/tricky01.dat   |   261 +
 .../testdata/tree-construction/webkit01.dat   |   594 +
 .../testdata/tree-construction/webkit02.dat   |    94 +
 html5lib/tests/tokenizertotree.py             |    64 +
 html5lib/tokenizer.py                         |  1744 +
 html5lib/treebuilders/__init__.py             |    96 +
 html5lib/treebuilders/_base.py                |   377 +
 html5lib/treebuilders/dom.py                  |   291 +
 html5lib/treebuilders/etree.py                |   344 +
 html5lib/treebuilders/etree_lxml.py           |   336 +
 html5lib/treebuilders/simpletree.py           |   256 +
 html5lib/treebuilders/soup.py                 |   236 +
 html5lib/treewalkers/__init__.py              |    52 +
 html5lib/treewalkers/_base.py                 |   176 +
 html5lib/treewalkers/dom.py                   |    41 +
 html5lib/treewalkers/etree.py                 |   141 +
 html5lib/treewalkers/genshistream.py          |    70 +
 html5lib/treewalkers/lxmletree.py             |   186 +
 html5lib/treewalkers/pulldom.py               |    60 +
 html5lib/treewalkers/simpletree.py            |    78 +
 html5lib/treewalkers/soup.py                  |    60 +
 html5lib/utils.py                             |   175 +
 136 files changed, 87265 insertions(+), 2428 deletions(-)
 delete mode 100644 bs4/tests/__init__.py
 delete mode 100644 bs4/tests/test_builder_registry.py
 delete mode 100644 bs4/tests/test_docs.py
 delete mode 100644 bs4/tests/test_html5lib.py
 delete mode 100644 bs4/tests/test_htmlparser.py
 delete mode 100644 bs4/tests/test_lxml.py
 delete mode 100644 bs4/tests/test_soup.py
 delete mode 100644 bs4/tests/test_tree.py
 create mode 100644 html5lib/__init__.py
 create mode 100644 html5lib/constants.py
 create mode 100644 html5lib/filters/__init__.py
 create mode 100644 html5lib/filters/_base.py
 create mode 100644 html5lib/filters/formfiller.py
 create mode 100644 html5lib/filters/inject_meta_charset.py
 create mode 100644 html5lib/filters/lint.py
 create mode 100644 html5lib/filters/optionaltags.py
 create mode 100644 html5lib/filters/sanitizer.py
 create mode 100644 html5lib/filters/whitespace.py
 create mode 100644 html5lib/html5parser.py
 create mode 100644 html5lib/ihatexml.py
 create mode 100644 html5lib/inputstream.py
 create mode 100644 html5lib/sanitizer.py
 create mode 100644 html5lib/serializer/__init__.py
 create mode 100644 html5lib/serializer/htmlserializer.py
 create mode 100644 html5lib/serializer/xhtmlserializer.py
 create mode 100644 html5lib/tests/__init__.py
 create mode 100644 html5lib/tests/mockParser.py
 create mode 100644 html5lib/tests/runparsertests.py
 create mode 100644 html5lib/tests/runtests.py
 create mode 100644 html5lib/tests/support.py
 create mode 100644 html5lib/tests/test_encoding.py
 create mode 100644 html5lib/tests/test_formfiller.py
 create mode 100644 html5lib/tests/test_parser.py
 create mode 100755 html5lib/tests/test_parser2.py
 create mode 100644 html5lib/tests/test_sanitizer.py
 create mode 100644 html5lib/tests/test_serializer.py
 create mode 100755 html5lib/tests/test_stream.py
 create mode 100644 html5lib/tests/test_tokenizer.py
 create mode 100644 html5lib/tests/test_treewalkers.py
 create mode 100644 html5lib/tests/test_whitespace_filter.py
 create mode 100644 html5lib/tests/testdata/encoding/test-yahoo-jp.dat
 create mode 100644 html5lib/tests/testdata/encoding/tests1.dat
 create mode 100644 html5lib/tests/testdata/encoding/tests2.dat
 create mode 100644 html5lib/tests/testdata/sanitizer/tests1.dat
 create mode 100644 html5lib/tests/testdata/serializer/core.test
 create mode 100644 html5lib/tests/testdata/serializer/injectmeta.test
 create mode 100644 html5lib/tests/testdata/serializer/optionaltags.test
 create mode 100644 html5lib/tests/testdata/serializer/options.test
 create mode 100644 html5lib/tests/testdata/serializer/whitespace.test
 create mode 100644 html5lib/tests/testdata/sniffer/htmlOrFeed.json
 create mode 100644 html5lib/tests/testdata/tokenizer/contentModelFlags.test
 create mode 100644 html5lib/tests/testdata/tokenizer/domjs.test
 create mode 100644 html5lib/tests/testdata/tokenizer/entities.test
 create mode 100644 html5lib/tests/testdata/tokenizer/escapeFlag.test
 create mode 100644 html5lib/tests/testdata/tokenizer/namedEntities.test
 create mode 100644 html5lib/tests/testdata/tokenizer/numericEntities.test
 create mode 100644 html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
 create mode 100644 html5lib/tests/testdata/tokenizer/test1.test
 create mode 100644 html5lib/tests/testdata/tokenizer/test2.test
 create mode 100644 html5lib/tests/testdata/tokenizer/test3.test
 create mode 100644 html5lib/tests/testdata/tokenizer/test4.test
 create mode 100644 html5lib/tests/testdata/tokenizer/unicodeChars.test
 create mode 100644 html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
 create mode 100644 html5lib/tests/testdata/tokenizer/xmlViolation.test
 create mode 100644 html5lib/tests/testdata/tree-construction/adoption01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/adoption02.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/comments01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/doctype01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/entities01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/entities02.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/html5test-com.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/inbody01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/isindex.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/scriptdata01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tables01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests1.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests10.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests11.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests12.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests14.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests15.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests16.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests17.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests18.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests19.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests2.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests20.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests21.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests22.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests23.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests24.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests25.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests26.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests3.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests4.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests5.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests6.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests7.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests8.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests9.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/tricky01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/webkit01.dat
 create mode 100644 html5lib/tests/testdata/tree-construction/webkit02.dat
 create mode 100644 html5lib/tests/tokenizertotree.py
 create mode 100644 html5lib/tokenizer.py
 create mode 100755 html5lib/treebuilders/__init__.py
 create mode 100755 html5lib/treebuilders/_base.py
 create mode 100644 html5lib/treebuilders/dom.py
 create mode 100755 html5lib/treebuilders/etree.py
 create mode 100644 html5lib/treebuilders/etree_lxml.py
 create mode 100755 html5lib/treebuilders/simpletree.py
 create mode 100644 html5lib/treebuilders/soup.py
 create mode 100644 html5lib/treewalkers/__init__.py
 create mode 100644 html5lib/treewalkers/_base.py
 create mode 100644 html5lib/treewalkers/dom.py
 create mode 100644 html5lib/treewalkers/etree.py
 create mode 100644 html5lib/treewalkers/genshistream.py
 create mode 100644 html5lib/treewalkers/lxmletree.py
 create mode 100644 html5lib/treewalkers/pulldom.py
 create mode 100644 html5lib/treewalkers/simpletree.py
 create mode 100644 html5lib/treewalkers/soup.py
 create mode 100644 html5lib/utils.py

diff --git a/bs4/__init__.py b/bs4/__init__.py
index af8c718d..80f6f684 100644
--- a/bs4/__init__.py
+++ b/bs4/__init__.py
@@ -17,7 +17,7 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.1.0"
+__version__ = "4.1.3"
 __copyright__ = "Copyright (c) 2004-2012 Leonard Richardson"
 __license__ = "MIT"
 
@@ -149,7 +149,7 @@ class BeautifulSoup(Tag):
                 features = self.DEFAULT_BUILDER_FEATURES
             builder_class = builder_registry.lookup(*features)
             if builder_class is None:
-                raise ValueError(
+                raise FeatureNotFound(
                     "Couldn't find a tree builder with the features you "
                     "requested: %s. Do you need to install a parser library?"
                     % ",".join(features))
@@ -208,10 +208,10 @@ class BeautifulSoup(Tag):
         return navigable
 
     def insert_before(self, successor):
-        raise ValueError("BeautifulSoup objects don't support insert_before().")
+        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
 
     def insert_after(self, successor):
-        raise ValueError("BeautifulSoup objects don't support insert_after().")
+        raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
 
     def popTag(self):
         tag = self.tagStack.pop()
@@ -267,7 +267,7 @@ class BeautifulSoup(Tag):
 
         for i in range(len(self.tagStack) - 1, 0, -1):
             if (name == self.tagStack[i].name
-                and nsprefix == self.tagStack[i].nsprefix == nsprefix):
+                and nsprefix == self.tagStack[i].prefix):
                 numPops = len(self.tagStack) - i
                 break
         if not inclusivePop:
@@ -348,6 +348,10 @@ class StopParsing(Exception):
     pass
 
 
+class FeatureNotFound(ValueError):
+    pass
+
+
 #By default, act as an HTML pretty-printer.
 if __name__ == '__main__':
     import sys
diff --git a/bs4/builder/__init__.py b/bs4/builder/__init__.py
index 4c22b864..dc7deb93 100644
--- a/bs4/builder/__init__.py
+++ b/bs4/builder/__init__.py
@@ -157,7 +157,16 @@ class TreeBuilder(object):
                     # value is a whitespace-separated list of CSS
                     # classes. Split it into a list.
                     value = attrs[cdata_list_attr]
-                    values = whitespace_re.split(value)
+                    if isinstance(value, basestring):
+                        values = whitespace_re.split(value)
+                    else:
+                        # html5lib sometimes calls setAttributes twice
+                        # for the same tag when rearranging the parse
+                        # tree. On the second call the attribute value
+                        # here is already a list.  If this happens,
+                        # leave the value alone rather than trying to
+                        # split it again.
+                        values = value
                     attrs[cdata_list_attr] = values
         return attrs
 
diff --git a/bs4/builder/_lxml.py b/bs4/builder/_lxml.py
index c78fdff6..f6b91ff5 100644
--- a/bs4/builder/_lxml.py
+++ b/bs4/builder/_lxml.py
@@ -111,14 +111,34 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                 attribute = NamespacedAttribute(
                     "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
                 attrs[attribute] = namespace
+
+        if self.nsmaps is not None and len(self.nsmaps) > 0:
+            # Namespaces are in play. Find any attributes that came in
+            # from lxml with namespaces attached to their names, and
+            # turn then into NamespacedAttribute objects.
+            new_attrs = {}
+            for attr, value in attrs.items():
+                namespace, attr = self._getNsTag(attr)
+                if namespace is None:
+                    new_attrs[attr] = value
+                else:
+                    nsprefix = self._prefix_for_namespace(namespace)
+                    attr = NamespacedAttribute(nsprefix, attr, namespace)
+                    new_attrs[attr] = value
+            attrs = new_attrs
+
         namespace, name = self._getNsTag(name)
-        if namespace is not None:
-            for inverted_nsmap in reversed(self.nsmaps):
-                if inverted_nsmap is not None and namespace in inverted_nsmap:
-                    nsprefix = inverted_nsmap[namespace]
-                    break
+        nsprefix = self._prefix_for_namespace(namespace)
         self.soup.handle_starttag(name, namespace, nsprefix, attrs)
 
+    def _prefix_for_namespace(self, namespace):
+        """Find the currently active prefix for the given namespace."""
+        if namespace is None:
+            return None
+        for inverted_nsmap in reversed(self.nsmaps):
+            if inverted_nsmap is not None and namespace in inverted_nsmap:
+                return inverted_nsmap[namespace]
+
     def end(self, name):
         self.soup.endData()
         completed_tag = self.soup.tagStack[-1]
diff --git a/bs4/dammit.py b/bs4/dammit.py
index 58cad9ba..983ade0f 100644
--- a/bs4/dammit.py
+++ b/bs4/dammit.py
@@ -10,18 +10,30 @@ encoding; that's the tree builder's job.
 import codecs
 from htmlentitydefs import codepoint2name
 import re
-import warnings
+import logging
 
-# Autodetects character encodings. Very useful.
-# Download from http://chardet.feedparser.org/
-#  or 'apt-get install python-chardet'
-#  or 'easy_install chardet'
+# Import a library to autodetect character encodings.
+chardet_type = None
 try:
-    import chardet
-    #import chardet.constants
-    #chardet.constants._debug = 1
+    # First try the fast C implementation.
+    #  PyPI package: cchardet
+    import cchardet
+    def chardet_dammit(s):
+        return cchardet.detect(s)['encoding']
 except ImportError:
-    chardet = None
+    try:
+        # Fall back to the pure Python implementation
+        #  Debian package: python-chardet
+        #  PyPI package: chardet
+        import chardet
+        def chardet_dammit(s):
+            return chardet.detect(s)['encoding']
+        #import chardet.constants
+        #chardet.constants._debug = 1
+    except ImportError:
+        # No chardet available.
+        def chardet_dammit(s):
+            return None
 
 # Available from http://cjkpython.i18n.org/.
 try:
@@ -207,8 +219,8 @@ class UnicodeDammit:
                         break
 
         # If no luck and we have auto-detection library, try that:
-        if not u and chardet and not isinstance(self.markup, unicode):
-            u = self._convert_from(chardet.detect(self.markup)['encoding'])
+        if not u and not isinstance(self.markup, unicode):
+            u = self._convert_from(chardet_dammit(self.markup))
 
         # As a last resort, try utf-8 and windows-1252:
         if not u:
@@ -226,10 +238,9 @@ class UnicodeDammit:
                 if proposed_encoding != "ascii":
                     u = self._convert_from(proposed_encoding, "replace")
                 if u is not None:
-                    warnings.warn(
-                        UnicodeWarning(
+                    logging.warning(
                             "Some characters could not be decoded, and were "
-                            "replaced with REPLACEMENT CHARACTER."))
+                            "replaced with REPLACEMENT CHARACTER.")
                     self.contains_replacement_characters = True
                     break
 
diff --git a/bs4/element.py b/bs4/element.py
index 91a40078..26422fda 100644
--- a/bs4/element.py
+++ b/bs4/element.py
@@ -820,7 +820,7 @@ class Tag(PageElement):
         for string in self._all_strings(True):
             yield string
 
-    def get_text(self, separator="", strip=False):
+    def get_text(self, separator=u"", strip=False):
         """
         Get all child strings, concatenated using the given separator.
         """
@@ -987,7 +987,7 @@ class Tag(PageElement):
                     if isinstance(val, list) or isinstance(val, tuple):
                         val = ' '.join(val)
                     elif not isinstance(val, basestring):
-                        val = str(val)
+                        val = unicode(val)
                     elif (
                         isinstance(val, AttributeValueWithCharsetSubstitution)
                         and eventual_encoding is not None):
@@ -995,20 +995,21 @@ class Tag(PageElement):
 
                     text = self.format_string(val, formatter)
                     decoded = (
-                        str(key) + '='
+                        unicode(key) + '='
                         + EntitySubstitution.quoted_attribute_value(text))
                 attrs.append(decoded)
         close = ''
         closeTag = ''
-        if self.is_empty_element:
-            close = '/'
-        else:
-            closeTag = '</%s>' % self.name
 
         prefix = ''
         if self.prefix:
             prefix = self.prefix + ":"
 
+        if self.is_empty_element:
+            close = '/'
+        else:
+            closeTag = '</%s%s>' % (prefix, self.name)
+
         pretty_print = (indent_level is not None)
         if pretty_print:
             space = (' ' * (indent_level - 1))
@@ -1120,6 +1121,7 @@ class Tag(PageElement):
         callable that takes a string and returns whether or not the
         string matches for some custom definition of 'matches'. The
         same is true of the tag name."""
+
         generator = self.descendants
         if not recursive:
             generator = self.children
@@ -1168,6 +1170,12 @@ class SoupStrainer(object):
             kwargs['class'] = attrs
             attrs = None
 
+        if 'class_' in kwargs:
+            # Treat class_="foo" as a search for the 'class'
+            # attribute, overriding any non-dict value for attrs.
+            kwargs['class'] = kwargs['class_']
+            del kwargs['class_']
+
         if kwargs:
             if attrs:
                 attrs = attrs.copy()
diff --git a/bs4/testing.py b/bs4/testing.py
index 5a84b0ba..30e74f42 100644
--- a/bs4/testing.py
+++ b/bs4/testing.py
@@ -202,6 +202,14 @@ class HTMLTreeBuilderSmokeTest(object):
             "<tbody><tr><td>Bar</td></tr></tbody>"
             "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
 
+    def test_deeply_nested_multivalued_attribute(self):
+        # html5lib can set the attributes of the same tag many times
+        # as it rearranges the tree. This has caused problems with
+        # multivalued attributes.
+        markup = '<table><div><div class="css"></div></div></table>'
+        soup = self.soup(markup)
+        self.assertEqual(["css"], soup.div.div['class'])
+
     def test_angle_brackets_in_attribute_values_are_escaped(self):
         self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
 
@@ -445,6 +453,11 @@ class XMLTreeBuilderSmokeTest(object):
         self.assertEqual(
             soup.encode("utf-8"), markup)
 
+    def test_popping_namespaced_tag(self):
+        markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
+        soup = self.soup(markup)
+        self.assertEqual(
+            unicode(soup.rss), markup)
 
     def test_docstring_includes_correct_encoding(self):
         soup = self.soup("<root/>")
@@ -472,6 +485,15 @@ class XMLTreeBuilderSmokeTest(object):
         self.assertEqual("http://example.com/", root['xmlns:a'])
         self.assertEqual("http://example.net/", root['xmlns:b'])
 
+    def test_closing_namespaced_tag(self):
+        markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
+        soup = self.soup(markup)
+        self.assertEqual(unicode(soup.p), markup)
+
+    def test_namespaced_attributes(self):
+        markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
+        soup = self.soup(markup)
+        self.assertEqual(unicode(soup.foo), markup)
 
 class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
     """Smoke test for a tree builder that supports HTML5."""
diff --git a/bs4/tests/__init__.py b/bs4/tests/__init__.py
deleted file mode 100644
index 142c8cc3..00000000
--- a/bs4/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"The beautifulsoup tests."
diff --git a/bs4/tests/test_builder_registry.py b/bs4/tests/test_builder_registry.py
deleted file mode 100644
index 92ad10fb..00000000
--- a/bs4/tests/test_builder_registry.py
+++ /dev/null
@@ -1,141 +0,0 @@
-"""Tests of the builder registry."""
-
-import unittest
-
-from bs4 import BeautifulSoup
-from bs4.builder import (
-    builder_registry as registry,
-    HTMLParserTreeBuilder,
-    TreeBuilderRegistry,
-)
-
-try:
-    from bs4.builder import HTML5TreeBuilder
-    HTML5LIB_PRESENT = True
-except ImportError:
-    HTML5LIB_PRESENT = False
-
-try:
-    from bs4.builder import (
-        LXMLTreeBuilderForXML,
-        LXMLTreeBuilder,
-        )
-    LXML_PRESENT = True
-except ImportError:
-    LXML_PRESENT = False
-
-
-class BuiltInRegistryTest(unittest.TestCase):
-    """Test the built-in registry with the default builders registered."""
-
-    def test_combination(self):
-        if LXML_PRESENT:
-            self.assertEqual(registry.lookup('fast', 'html'),
-                             LXMLTreeBuilder)
-
-        if LXML_PRESENT:
-            self.assertEqual(registry.lookup('permissive', 'xml'),
-                             LXMLTreeBuilderForXML)
-        self.assertEqual(registry.lookup('strict', 'html'),
-                          HTMLParserTreeBuilder)
-        if HTML5LIB_PRESENT:
-            self.assertEqual(registry.lookup('html5lib', 'html'),
-                              HTML5TreeBuilder)
-
-    def test_lookup_by_markup_type(self):
-        if LXML_PRESENT:
-            self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
-            self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
-        else:
-            self.assertEqual(registry.lookup('xml'), None)
-            if HTML5LIB_PRESENT:
-                self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
-            else:
-                self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
-
-    def test_named_library(self):
-        if LXML_PRESENT:
-            self.assertEqual(registry.lookup('lxml', 'xml'),
-                             LXMLTreeBuilderForXML)
-            self.assertEqual(registry.lookup('lxml', 'html'),
-                             LXMLTreeBuilder)
-        if HTML5LIB_PRESENT:
-            self.assertEqual(registry.lookup('html5lib'),
-                              HTML5TreeBuilder)
-
-        self.assertEqual(registry.lookup('html.parser'),
-                          HTMLParserTreeBuilder)
-
-    def test_beautifulsoup_constructor_does_lookup(self):
-        # You can pass in a string.
-        BeautifulSoup("", features="html")
-        # Or a list of strings.
-        BeautifulSoup("", features=["html", "fast"])
-
-        # You'll get an exception if BS can't find an appropriate
-        # builder.
-        self.assertRaises(ValueError, BeautifulSoup,
-                          "", features="no-such-feature")
-
-class RegistryTest(unittest.TestCase):
-    """Test the TreeBuilderRegistry class in general."""
-
-    def setUp(self):
-        self.registry = TreeBuilderRegistry()
-
-    def builder_for_features(self, *feature_list):
-        cls = type('Builder_' + '_'.join(feature_list),
-                   (object,), {'features' : feature_list})
-
-        self.registry.register(cls)
-        return cls
-
-    def test_register_with_no_features(self):
-        builder = self.builder_for_features()
-
-        # Since the builder advertises no features, you can't find it
-        # by looking up features.
-        self.assertEqual(self.registry.lookup('foo'), None)
-
-        # But you can find it by doing a lookup with no features, if
-        # this happens to be the only registered builder.
-        self.assertEqual(self.registry.lookup(), builder)
-
-    def test_register_with_features_makes_lookup_succeed(self):
-        builder = self.builder_for_features('foo', 'bar')
-        self.assertEqual(self.registry.lookup('foo'), builder)
-        self.assertEqual(self.registry.lookup('bar'), builder)
-
-    def test_lookup_fails_when_no_builder_implements_feature(self):
-        builder = self.builder_for_features('foo', 'bar')
-        self.assertEqual(self.registry.lookup('baz'), None)
-
-    def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
-        builder1 = self.builder_for_features('foo')
-        builder2 = self.builder_for_features('bar')
-        self.assertEqual(self.registry.lookup(), builder2)
-
-    def test_lookup_fails_when_no_tree_builders_registered(self):
-        self.assertEqual(self.registry.lookup(), None)
-
-    def test_lookup_gets_most_recent_builder_supporting_all_features(self):
-        has_one = self.builder_for_features('foo')
-        has_the_other = self.builder_for_features('bar')
-        has_both_early = self.builder_for_features('foo', 'bar', 'baz')
-        has_both_late = self.builder_for_features('foo', 'bar', 'quux')
-        lacks_one = self.builder_for_features('bar')
-        has_the_other = self.builder_for_features('foo')
-
-        # There are two builders featuring 'foo' and 'bar', but
-        # the one that also features 'quux' was registered later.
-        self.assertEqual(self.registry.lookup('foo', 'bar'),
-                          has_both_late)
-
-        # There is only one builder featuring 'foo', 'bar', and 'baz'.
-        self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
-                          has_both_early)
-
-    def test_lookup_fails_when_cannot_reconcile_requested_features(self):
-        builder1 = self.builder_for_features('foo', 'bar')
-        builder2 = self.builder_for_features('foo', 'baz')
-        self.assertEqual(self.registry.lookup('bar', 'baz'), None)
diff --git a/bs4/tests/test_docs.py b/bs4/tests/test_docs.py
deleted file mode 100644
index 5b9f6770..00000000
--- a/bs4/tests/test_docs.py
+++ /dev/null
@@ -1,36 +0,0 @@
-"Test harness for doctests."
-
-# pylint: disable-msg=E0611,W0142
-
-__metaclass__ = type
-__all__ = [
-    'additional_tests',
-    ]
-
-import atexit
-import doctest
-import os
-#from pkg_resources import (
-#    resource_filename, resource_exists, resource_listdir, cleanup_resources)
-import unittest
-
-DOCTEST_FLAGS = (
-    doctest.ELLIPSIS |
-    doctest.NORMALIZE_WHITESPACE |
-    doctest.REPORT_NDIFF)
-
-
-# def additional_tests():
-#     "Run the doc tests (README.txt and docs/*, if any exist)"
-#     doctest_files = [
-#         os.path.abspath(resource_filename('bs4', 'README.txt'))]
-#     if resource_exists('bs4', 'docs'):
-#         for name in resource_listdir('bs4', 'docs'):
-#             if name.endswith('.txt'):
-#                 doctest_files.append(
-#                     os.path.abspath(
-#                         resource_filename('bs4', 'docs/%s' % name)))
-#     kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
-#     atexit.register(cleanup_resources)
-#     return unittest.TestSuite((
-#         doctest.DocFileSuite(*doctest_files, **kwargs)))
diff --git a/bs4/tests/test_html5lib.py b/bs4/tests/test_html5lib.py
deleted file mode 100644
index f195f7d0..00000000
--- a/bs4/tests/test_html5lib.py
+++ /dev/null
@@ -1,58 +0,0 @@
-"""Tests to ensure that the html5lib tree builder generates good trees."""
-
-import warnings
-
-try:
-    from bs4.builder import HTML5TreeBuilder
-    HTML5LIB_PRESENT = True
-except ImportError, e:
-    HTML5LIB_PRESENT = False
-from bs4.element import SoupStrainer
-from bs4.testing import (
-    HTML5TreeBuilderSmokeTest,
-    SoupTest,
-    skipIf,
-)
-
-@skipIf(
-    not HTML5LIB_PRESENT,
-    "html5lib seems not to be present, not testing its tree builder.")
-class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
-    """See ``HTML5TreeBuilderSmokeTest``."""
-
-    @property
-    def default_builder(self):
-        return HTML5TreeBuilder()
-
-    def test_soupstrainer(self):
-        # The html5lib tree builder does not support SoupStrainers.
-        strainer = SoupStrainer("b")
-        markup = "<p>A <b>bold</b> statement.</p>"
-        with warnings.catch_warnings(record=True) as w:
-            soup = self.soup(markup, parse_only=strainer)
-        self.assertEqual(
-            soup.decode(), self.document_for(markup))
-
-        self.assertTrue(
-            "the html5lib tree builder doesn't support parse_only" in
-            str(w[0].message))
-
-    def test_correctly_nested_tables(self):
-        """html5lib inserts <tbody> tags where other parsers don't."""
-        markup = ('<table id="1">'
-                  '<tr>'
-                  "<td>Here's another table:"
-                  '<table id="2">'
-                  '<tr><td>foo</td></tr>'
-                  '</table></td>')
-
-        self.assertSoupEquals(
-            markup,
-            '<table id="1"><tbody><tr><td>Here\'s another table:'
-            '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
-            '</td></tr></tbody></table>')
-
-        self.assertSoupEquals(
-            "<table><thead><tr><td>Foo</td></tr></thead>"
-            "<tbody><tr><td>Bar</td></tr></tbody>"
-            "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
diff --git a/bs4/tests/test_htmlparser.py b/bs4/tests/test_htmlparser.py
deleted file mode 100644
index bcb5ed23..00000000
--- a/bs4/tests/test_htmlparser.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""Tests to ensure that the html.parser tree builder generates good
-trees."""
-
-from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
-from bs4.builder import HTMLParserTreeBuilder
-
-class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
-
-    @property
-    def default_builder(self):
-        return HTMLParserTreeBuilder()
-
-    def test_namespaced_system_doctype(self):
-        # html.parser can't handle namespaced doctypes, so skip this one.
-        pass
-
-    def test_namespaced_public_doctype(self):
-        # html.parser can't handle namespaced doctypes, so skip this one.
-        pass
diff --git a/bs4/tests/test_lxml.py b/bs4/tests/test_lxml.py
deleted file mode 100644
index 39e26bfb..00000000
--- a/bs4/tests/test_lxml.py
+++ /dev/null
@@ -1,75 +0,0 @@
-"""Tests to ensure that the lxml tree builder generates good trees."""
-
-import re
-import warnings
-
-try:
-    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
-    LXML_PRESENT = True
-except ImportError, e:
-    LXML_PRESENT = False
-
-from bs4 import (
-    BeautifulSoup,
-    BeautifulStoneSoup,
-    )
-from bs4.element import Comment, Doctype, SoupStrainer
-from bs4.testing import skipIf
-from bs4.tests import test_htmlparser
-from bs4.testing import (
-    HTMLTreeBuilderSmokeTest,
-    XMLTreeBuilderSmokeTest,
-    SoupTest,
-    skipIf,
-)
-
-@skipIf(
-    not LXML_PRESENT,
-    "lxml seems not to be present, not testing its tree builder.")
-class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
-    """See ``HTMLTreeBuilderSmokeTest``."""
-
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilder()
-
-    def test_out_of_range_entity(self):
-        self.assertSoupEquals(
-            "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
-        self.assertSoupEquals(
-            "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
-        self.assertSoupEquals(
-            "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
-
-    def test_beautifulstonesoup_is_xml_parser(self):
-        # Make sure that the deprecated BSS class uses an xml builder
-        # if one is installed.
-        with warnings.catch_warnings(record=False) as w:
-            soup = BeautifulStoneSoup("<b />")
-            self.assertEqual(u"<b/>", unicode(soup.b))
-
-    def test_real_xhtml_document(self):
-        """lxml strips the XML definition from an XHTML doc, which is fine."""
-        markup = b"""<?xml version="1.0" encoding="utf-8"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head><title>Hello.</title></head>
-<body>Goodbye.</body>
-</html>"""
-        soup = self.soup(markup)
-        self.assertEqual(
-            soup.encode("utf-8").replace(b"\n", b''),
-            markup.replace(b'\n', b'').replace(
-                b'<?xml version="1.0" encoding="utf-8"?>', b''))
-
-
-@skipIf(
-    not LXML_PRESENT,
-    "lxml seems not to be present, not testing its XML tree builder.")
-class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
-    """See ``HTMLTreeBuilderSmokeTest``."""
-
-    @property
-    def default_builder(self):
-        return LXMLTreeBuilderForXML()
-
diff --git a/bs4/tests/test_soup.py b/bs4/tests/test_soup.py
deleted file mode 100644
index 23a664e7..00000000
--- a/bs4/tests/test_soup.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Tests of Beautiful Soup as a whole."""
-
-import unittest
-from bs4 import (
-    BeautifulSoup,
-    BeautifulStoneSoup,
-)
-from bs4.element import (
-    CharsetMetaAttributeValue,
-    ContentMetaAttributeValue,
-    SoupStrainer,
-    NamespacedAttribute,
-    )
-import bs4.dammit
-from bs4.dammit import EntitySubstitution, UnicodeDammit
-from bs4.testing import (
-    SoupTest,
-    skipIf,
-)
-import warnings
-
-try:
-    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
-    LXML_PRESENT = True
-except ImportError, e:
-    LXML_PRESENT = False
-
-class TestDeprecatedConstructorArguments(SoupTest):
-
-    def test_parseOnlyThese_renamed_to_parse_only(self):
-        with warnings.catch_warnings(record=True) as w:
-            soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
-        msg = str(w[0].message)
-        self.assertTrue("parseOnlyThese" in msg)
-        self.assertTrue("parse_only" in msg)
-        self.assertEqual(b"<b></b>", soup.encode())
-
-    def test_fromEncoding_renamed_to_from_encoding(self):
-        with warnings.catch_warnings(record=True) as w:
-            utf8 = b"\xc3\xa9"
-            soup = self.soup(utf8, fromEncoding="utf8")
-        msg = str(w[0].message)
-        self.assertTrue("fromEncoding" in msg)
-        self.assertTrue("from_encoding" in msg)
-        self.assertEqual("utf8", soup.original_encoding)
-
-    def test_unrecognized_keyword_argument(self):
-        self.assertRaises(
-            TypeError, self.soup, "<a>", no_such_argument=True)
-
-    @skipIf(
-        not LXML_PRESENT,
-        "lxml not present, not testing BeautifulStoneSoup.")
-    def test_beautifulstonesoup(self):
-        with warnings.catch_warnings(record=True) as w:
-            soup = BeautifulStoneSoup("<markup>")
-            self.assertTrue(isinstance(soup, BeautifulSoup))
-            self.assertTrue("BeautifulStoneSoup class is deprecated")
-
-class TestSelectiveParsing(SoupTest):
-
-    def test_parse_with_soupstrainer(self):
-        markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
-        strainer = SoupStrainer("b")
-        soup = self.soup(markup, parse_only=strainer)
-        self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
-
-
-class TestEntitySubstitution(unittest.TestCase):
-    """Standalone tests of the EntitySubstitution class."""
-    def setUp(self):
-        self.sub = EntitySubstitution
-
-    def test_simple_html_substitution(self):
-        # Unicode characters corresponding to named HTML entites
-        # are substituted, and no others.
-        s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
-        self.assertEqual(self.sub.substitute_html(s),
-                          u"foo&forall;\N{SNOWMAN}&otilde;bar")
-
-    def test_smart_quote_substitution(self):
-        # MS smart quotes are a common source of frustration, so we
-        # give them a special test.
-        quotes = b"\x91\x92foo\x93\x94"
-        dammit = UnicodeDammit(quotes)
-        self.assertEqual(self.sub.substitute_html(dammit.markup),
-                          "&lsquo;&rsquo;foo&ldquo;&rdquo;")
-
-    def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
-        s = 'Welcome to "my bar"'
-        self.assertEqual(self.sub.substitute_xml(s, False), s)
-
-    def test_xml_attribute_quoting_normally_uses_double_quotes(self):
-        self.assertEqual(self.sub.substitute_xml("Welcome", True),
-                          '"Welcome"')
-        self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
-                          '"Bob\'s Bar"')
-
-    def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
-        s = 'Welcome to "my bar"'
-        self.assertEqual(self.sub.substitute_xml(s, True),
-                          "'Welcome to \"my bar\"'")
-
-    def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
-        s = 'Welcome to "Bob\'s Bar"'
-        self.assertEqual(
-            self.sub.substitute_xml(s, True),
-            '"Welcome to &quot;Bob\'s Bar&quot;"')
-
-    def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
-        quoted = 'Welcome to "Bob\'s Bar"'
-        self.assertEqual(self.sub.substitute_xml(quoted), quoted)
-
-    def test_xml_quoting_handles_angle_brackets(self):
-        self.assertEqual(
-            self.sub.substitute_xml("foo<bar>"),
-            "foo&lt;bar&gt;")
-
-    def test_xml_quoting_handles_ampersands(self):
-        self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
-
-    def test_xml_quoting_ignores_ampersands_when_they_are_part_of_an_entity(self):
-        self.assertEqual(
-            self.sub.substitute_xml("&Aacute;T&T"),
-            "&Aacute;T&amp;T")
-
-    def test_quotes_not_html_substituted(self):
-        """There's no need to do this except inside attribute values."""
-        text = 'Bob\'s "bar"'
-        self.assertEqual(self.sub.substitute_html(text), text)
-
-
-class TestEncodingConversion(SoupTest):
-    # Test Beautiful Soup's ability to decode and encode from various
-    # encodings.
-
-    def setUp(self):
-        super(TestEncodingConversion, self).setUp()
-        self.unicode_data = u"<html><head></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>"
-        self.utf8_data = self.unicode_data.encode("utf-8")
-        # Just so you know what it looks like.
-        self.assertEqual(
-            self.utf8_data,
-            b"<html><head></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>")
-
-    def test_ascii_in_unicode_out(self):
-        # ASCII input is converted to Unicode. The original_encoding
-        # attribute is set.
-        ascii = b"<foo>a</foo>"
-        soup_from_ascii = self.soup(ascii)
-        unicode_output = soup_from_ascii.decode()
-        self.assertTrue(isinstance(unicode_output, unicode))
-        self.assertEqual(unicode_output, self.document_for(ascii.decode()))
-        self.assertEqual(soup_from_ascii.original_encoding, "ascii")
-
-    def test_unicode_in_unicode_out(self):
-        # Unicode input is left alone. The original_encoding attribute
-        # is not set.
-        soup_from_unicode = self.soup(self.unicode_data)
-        self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
-        self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
-        self.assertEqual(soup_from_unicode.original_encoding, None)
-
-    def test_utf8_in_unicode_out(self):
-        # UTF-8 input is converted to Unicode. The original_encoding
-        # attribute is set.
-        soup_from_utf8 = self.soup(self.utf8_data)
-        self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
-        self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
-
-    def test_utf8_out(self):
-        # The internal data structures can be encoded as UTF-8.
-        soup_from_unicode = self.soup(self.unicode_data)
-        self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
-
-
-class TestUnicodeDammit(unittest.TestCase):
-    """Standalone tests of Unicode, Dammit."""
-
-    def test_smart_quotes_to_unicode(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup)
-        self.assertEqual(
-            dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
-
-    def test_smart_quotes_to_xml_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="xml")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
-
-    def test_smart_quotes_to_html_entities(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="html")
-        self.assertEqual(
-            dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
-
-    def test_smart_quotes_to_ascii(self):
-        markup = b"<foo>\x91\x92\x93\x94</foo>"
-        dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
-        self.assertEqual(
-            dammit.unicode_markup, """<foo>''""</foo>""")
-
-    def test_detect_utf8(self):
-        utf8 = b"\xc3\xa9"
-        dammit = UnicodeDammit(utf8)
-        self.assertEqual(dammit.unicode_markup, u'\xe9')
-        self.assertEqual(dammit.original_encoding, 'utf-8')
-
-    def test_convert_hebrew(self):
-        hebrew = b"\xed\xe5\xec\xf9"
-        dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding, 'iso-8859-8')
-        self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
-
-    def test_dont_see_smart_quotes_where_there_are_none(self):
-        utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
-        dammit = UnicodeDammit(utf_8)
-        self.assertEqual(dammit.original_encoding, 'utf-8')
-        self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
-
-    def test_ignore_inappropriate_codecs(self):
-        utf8_data = u"Räksmörgås".encode("utf-8")
-        dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
-        self.assertEqual(dammit.original_encoding, 'utf-8')
-
-    def test_ignore_invalid_codecs(self):
-        utf8_data = u"Räksmörgås".encode("utf-8")
-        for bad_encoding in ['.utf8', '...', 'utF---16.!']:
-            dammit = UnicodeDammit(utf8_data, [bad_encoding])
-            self.assertEqual(dammit.original_encoding, 'utf-8')
-
-    def test_detect_html5_style_meta_tag(self):
-
-        for data in (
-            b'<html><meta charset="euc-jp" /></html>',
-            b"<html><meta charset='euc-jp' /></html>",
-            b"<html><meta charset=euc-jp /></html>",
-            b"<html><meta charset=euc-jp/></html>"):
-            dammit = UnicodeDammit(data, is_html=True)
-            self.assertEqual(
-                "euc-jp", dammit.original_encoding)
-
-    def test_last_ditch_entity_replacement(self):
-        # This is a UTF-8 document that contains bytestrings
-        # completely incompatible with UTF-8 (ie. encoded with some other
-        # encoding).
-        #
-        # Since there is no consistent encoding for the document,
-        # Unicode, Dammit will eventually encode the document as UTF-8
-        # and encode the incompatible characters as REPLACEMENT
-        # CHARACTER.
-        #
-        # If chardet is installed, it will detect that the document
-        # can be converted into ISO-8859-1 without errors. This happens
-        # to be the wrong encoding, but it is a consistent encoding, so the
-        # code we're testing here won't run.
-        #
-        # So we temporarily disable chardet if it's present.
-        doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
-<html><b>\330\250\330\252\330\261</b>
-<i>\310\322\321\220\312\321\355\344</i></html>"""
-        chardet = bs4.dammit.chardet
-        try:
-            bs4.dammit.chardet = None
-            with warnings.catch_warnings(record=True) as w:
-                dammit = UnicodeDammit(doc)
-                self.assertEqual(True, dammit.contains_replacement_characters)
-                self.assertTrue(u"\ufffd" in dammit.unicode_markup)
-
-                soup = BeautifulSoup(doc, "html.parser")
-                self.assertTrue(soup.contains_replacement_characters)
-
-                msg = w[0].message
-                self.assertTrue(isinstance(msg, UnicodeWarning))
-                self.assertTrue("Some characters could not be decoded" in str(msg))
-        finally:
-            bs4.dammit.chardet = chardet
-
-    def test_sniffed_xml_encoding(self):
-        # A document written in UTF-16LE will be converted by a different
-        # code path that sniffs the byte order markers.
-        data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
-        dammit = UnicodeDammit(data)
-        self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
-        self.assertEqual("utf-16le", dammit.original_encoding)
-
-    def test_detwingle(self):
-        # Here's a UTF8 document.
-        utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
-
-        # Here's a Windows-1252 document.
-        windows_1252 = (
-            u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
-            u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
-
-        # Through some unholy alchemy, they've been stuck together.
-        doc = utf8 + windows_1252 + utf8
-
-        # The document can't be turned into UTF-8:
-        self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
-
-        # Unicode, Dammit thinks the whole document is Windows-1252,
-        # and decodes it into "â˜ƒâ˜ƒâ˜ƒ“Hi, I like Windows!”â˜ƒâ˜ƒâ˜ƒ"
-
-        # But if we run it through fix_embedded_windows_1252, it's fixed:
-
-        fixed = UnicodeDammit.detwingle(doc)
-        self.assertEqual(
-            u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
-
-    def test_detwingle_ignores_multibyte_characters(self):
-        # Each of these characters has a UTF-8 representation ending
-        # in \x93. \x93 is a smart quote if interpreted as
-        # Windows-1252. But our code knows to skip over multibyte
-        # UTF-8 characters, so they'll survive the process unscathed.
-        for tricky_unicode_char in (
-            u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
-            u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
-            u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
-            ):
-            input = tricky_unicode_char.encode("utf8")
-            self.assertTrue(input.endswith(b'\x93'))
-            output = UnicodeDammit.detwingle(input)
-            self.assertEqual(output, input)
-
-class TestNamedspacedAttribute(SoupTest):
-
-    def test_name_may_be_none(self):
-        a = NamespacedAttribute("xmlns", None)
-        self.assertEqual(a, "xmlns")
-
-    def test_attribute_is_equivalent_to_colon_separated_string(self):
-        a = NamespacedAttribute("a", "b")
-        self.assertEqual("a:b", a)
-
-    def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
-        a = NamespacedAttribute("a", "b", "c")
-        b = NamespacedAttribute("a", "b", "c")
-        self.assertEqual(a, b)
-
-        # The actual namespace is not considered.
-        c = NamespacedAttribute("a", "b", None)
-        self.assertEqual(a, c)
-
-        # But name and prefix are important.
-        d = NamespacedAttribute("a", "z", "c")
-        self.assertNotEqual(a, d)
-
-        e = NamespacedAttribute("z", "b", "c")
-        self.assertNotEqual(a, e)
-
-
-class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
-
-    def test_content_meta_attribute_value(self):
-        value = CharsetMetaAttributeValue("euc-jp")
-        self.assertEqual("euc-jp", value)
-        self.assertEqual("euc-jp", value.original_value)
-        self.assertEqual("utf8", value.encode("utf8"))
-
-
-    def test_content_meta_attribute_value(self):
-        value = ContentMetaAttributeValue("text/html; charset=euc-jp")
-        self.assertEqual("text/html; charset=euc-jp", value)
-        self.assertEqual("text/html; charset=euc-jp", value.original_value)
-        self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
diff --git a/bs4/tests/test_tree.py b/bs4/tests/test_tree.py
deleted file mode 100644
index cc573ede..00000000
--- a/bs4/tests/test_tree.py
+++ /dev/null
@@ -1,1695 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Tests for Beautiful Soup's tree traversal methods.
-
-The tree traversal methods are the main advantage of using Beautiful
-Soup over just using a parser.
-
-Different parsers will build different Beautiful Soup trees given the
-same markup, but all Beautiful Soup trees can be traversed with the
-methods tested here.
-"""
-
-import copy
-import pickle
-import re
-import warnings
-from bs4 import BeautifulSoup
-from bs4.builder import (
-    builder_registry,
-    HTMLParserTreeBuilder,
-)
-from bs4.element import (
-    CData,
-    Doctype,
-    NavigableString,
-    SoupStrainer,
-    Tag,
-)
-from bs4.testing import (
-    SoupTest,
-    skipIf,
-)
-
-XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
-LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
-
-class TreeTest(SoupTest):
-
-    def assertSelects(self, tags, should_match):
-        """Make sure that the given tags have the correct text.
-
-        This is used in tests that define a bunch of tags, each
-        containing a single string, and then select certain strings by
-        some mechanism.
-        """
-        self.assertEqual([tag.string for tag in tags], should_match)
-
-    def assertSelectsIDs(self, tags, should_match):
-        """Make sure that the given tags have the correct IDs.
-
-        This is used in tests that define a bunch of tags, each
-        containing a single string, and then select certain strings by
-        some mechanism.
-        """
-        self.assertEqual([tag['id'] for tag in tags], should_match)
-
-
-class TestFind(TreeTest):
-    """Basic tests of the find() method.
-
-    find() just calls find_all() with limit=1, so it's not tested all
-    that thouroughly here.
-    """
-
-    def test_find_tag(self):
-        soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
-        self.assertEqual(soup.find("b").string, "2")
-
-    def test_unicode_text_find(self):
-        soup = self.soup(u'<h1>Räksmörgås</h1>')
-        self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås')
-
-class TestFindAll(TreeTest):
-    """Basic tests of the find_all() method."""
-
-    def test_find_all_text_nodes(self):
-        """You can search the tree for text nodes."""
-        soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
-        # Exact match.
-        self.assertEqual(soup.find_all(text="bar"), [u"bar"])
-        # Match any of a number of strings.
-        self.assertEqual(
-            soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"])
-        # Match a regular expression.
-        self.assertEqual(soup.find_all(text=re.compile('.*')),
-                         [u"Foo", u"bar", u'\xbb'])
-        # Match anything.
-        self.assertEqual(soup.find_all(text=True),
-                         [u"Foo", u"bar", u'\xbb'])
-
-    def test_find_all_limit(self):
-        """You can limit the number of items returned by find_all."""
-        soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
-        self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
-        self.assertSelects(soup.find_all('a', limit=1), ["1"])
-        self.assertSelects(
-            soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
-
-        # A limit of 0 means no limit.
-        self.assertSelects(
-            soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
-
-    def test_calling_a_tag_is_calling_findall(self):
-        soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
-        self.assertSelects(soup('a', limit=1), ["1"])
-        self.assertSelects(soup.b(id="foo"), ["3"])
-
-    def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
-        soup = self.soup("<a></a>")
-        # Create a self-referential list.
-        l = []
-        l.append(l)
-
-        # Without special code in _normalize_search_value, this would cause infinite
-        # recursion.
-        self.assertEqual([], soup.find_all(l))
-
-class TestFindAllBasicNamespaces(TreeTest):
-
-    def test_find_by_namespaced_name(self):
-        soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
-        self.assertEqual("4", soup.find("mathml:msqrt").string)
-        self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
-
-
-class TestFindAllByName(TreeTest):
-    """Test ways of finding tags by tag name."""
-
-    def setUp(self):
-        super(TreeTest, self).setUp()
-        self.tree =  self.soup("""<a>First tag.</a>
-                                  <b>Second tag.</b>
-                                  <c>Third <a>Nested tag.</a> tag.</c>""")
-
-    def test_find_all_by_tag_name(self):
-        # Find all the <a> tags.
-        self.assertSelects(
-            self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
-
-    def test_find_all_by_name_and_text(self):
-        self.assertSelects(
-            self.tree.find_all('a', text='First tag.'), ['First tag.'])
-
-        self.assertSelects(
-            self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
-
-        self.assertSelects(
-            self.tree.find_all('a', text=re.compile("tag")),
-            ['First tag.', 'Nested tag.'])
-
-
-    def test_find_all_on_non_root_element(self):
-        # You can call find_all on any node, not just the root.
-        self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
-
-    def test_calling_element_invokes_find_all(self):
-        self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
-
-    def test_find_all_by_tag_strainer(self):
-        self.assertSelects(
-            self.tree.find_all(SoupStrainer('a')),
-            ['First tag.', 'Nested tag.'])
-
-    def test_find_all_by_tag_names(self):
-        self.assertSelects(
-            self.tree.find_all(['a', 'b']),
-            ['First tag.', 'Second tag.', 'Nested tag.'])
-
-    def test_find_all_by_tag_dict(self):
-        self.assertSelects(
-            self.tree.find_all({'a' : True, 'b' : True}),
-            ['First tag.', 'Second tag.', 'Nested tag.'])
-
-    def test_find_all_by_tag_re(self):
-        self.assertSelects(
-            self.tree.find_all(re.compile('^[ab]$')),
-            ['First tag.', 'Second tag.', 'Nested tag.'])
-
-    def test_find_all_with_tags_matching_method(self):
-        # You can define an oracle method that determines whether
-        # a tag matches the search.
-        def id_matches_name(tag):
-            return tag.name == tag.get('id')
-
-        tree = self.soup("""<a id="a">Match 1.</a>
-                            <a id="1">Does not match.</a>
-                            <b id="b">Match 2.</a>""")
-
-        self.assertSelects(
-            tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
-
-
-class TestFindAllByAttribute(TreeTest):
-
-    def test_find_all_by_attribute_name(self):
-        # You can pass in keyword arguments to find_all to search by
-        # attribute.
-        tree = self.soup("""
-                         <a id="first">Matching a.</a>
-                         <a id="second">
-                          Non-matching <b id="first">Matching b.</b>a.
-                         </a>""")
-        self.assertSelects(tree.find_all(id='first'),
-                           ["Matching a.", "Matching b."])
-
-    def test_find_all_by_utf8_attribute_value(self):
-        peace = u"םולש".encode("utf8")
-        data = u'<a title="םולש"></a>'.encode("utf8")
-        soup = self.soup(data)
-        self.assertEqual([soup.a], soup.find_all(title=peace))
-        self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
-        self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
-
-    def test_find_all_by_attribute_dict(self):
-        # You can pass in a dictionary as the argument 'attrs'. This
-        # lets you search for attributes like 'name' (a fixed argument
-        # to find_all) and 'class' (a reserved word in Python.)
-        tree = self.soup("""
-                         <a name="name1" class="class1">Name match.</a>
-                         <a name="name2" class="class2">Class match.</a>
-                         <a name="name3" class="class3">Non-match.</a>
-                         <name1>A tag called 'name1'.</name1>
-                         """)
-
-        # This doesn't do what you want.
-        self.assertSelects(tree.find_all(name='name1'),
-                           ["A tag called 'name1'."])
-        # This does what you want.
-        self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
-                           ["Name match."])
-
-        # Passing class='class2' would cause a syntax error.
-        self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
-                           ["Class match."])
-
-    def test_find_all_by_class(self):
-        # Passing in a string to 'attrs' will search the CSS class.
-        tree = self.soup("""
-                         <a class="1">Class 1.</a>
-                         <a class="2">Class 2.</a>
-                         <b class="1">Class 1.</b>
-                         <c class="3 4">Class 3 and 4.</c>
-                         """)
-        self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
-        self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
-        self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
-        self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
-
-    def test_find_by_class_when_multiple_classes_present(self):
-        tree = self.soup("<gar class='foo bar'>Found it</gar>")
-
-        attrs = { 'class' : re.compile("o") }
-        f = tree.find_all("gar", attrs=attrs)
-        self.assertSelects(f, ["Found it"])
-
-        f = tree.find_all("gar", re.compile("a"))
-        self.assertSelects(f, ["Found it"])
-
-        # Since the class is not the string "foo bar", but the two
-        # strings "foo" and "bar", this will not find anything.
-        attrs = { 'class' : re.compile("o b") }
-        f = tree.find_all("gar", attrs=attrs)
-        self.assertSelects(f, [])
-
-    def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
-        soup = self.soup("<a class='bar'>Found it</a>")
-
-        self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
-
-        def big_attribute_value(value):
-            return len(value) > 3
-
-        self.assertSelects(soup.find_all("a", big_attribute_value), [])
-
-        def small_attribute_value(value):
-            return len(value) <= 3
-
-        self.assertSelects(
-            soup.find_all("a", small_attribute_value), ["Found it"])
-
-    def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
-        soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
-        a, a2 = soup.find_all("a")
-        self.assertEqual([a, a2], soup.find_all("a", "foo"))
-        self.assertEqual([a], soup.find_all("a", "bar"))
-
-        # If you specify the attribute as a string that contains a
-        # space, only that specific value will be found.
-        self.assertEqual([a], soup.find_all("a", "foo bar"))
-        self.assertEqual([], soup.find_all("a", "bar foo"))
-
-    def test_find_all_by_attribute_soupstrainer(self):
-        tree = self.soup("""
-                         <a id="first">Match.</a>
-                         <a id="second">Non-match.</a>""")
-
-        strainer = SoupStrainer(attrs={'id' : 'first'})
-        self.assertSelects(tree.find_all(strainer), ['Match.'])
-
-    def test_find_all_with_missing_atribute(self):
-        # You can pass in None as the value of an attribute to find_all.
-        # This will match tags that do not have that attribute set.
-        tree = self.soup("""<a id="1">ID present.</a>
-                            <a>No ID present.</a>
-                            <a id="">ID is empty.</a>""")
-        self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
-
-    def test_find_all_with_defined_attribute(self):
-        # You can pass in None as the value of an attribute to find_all.
-        # This will match tags that have that attribute set to any value.
-        tree = self.soup("""<a id="1">ID present.</a>
-                            <a>No ID present.</a>
-                            <a id="">ID is empty.</a>""")
-        self.assertSelects(
-            tree.find_all(id=True), ["ID present.", "ID is empty."])
-
-    def test_find_all_with_numeric_attribute(self):
-        # If you search for a number, it's treated as a string.
-        tree = self.soup("""<a id=1>Unquoted attribute.</a>
-                            <a id="1">Quoted attribute.</a>""")
-
-        expected = ["Unquoted attribute.", "Quoted attribute."]
-        self.assertSelects(tree.find_all(id=1), expected)
-        self.assertSelects(tree.find_all(id="1"), expected)
-
-    def test_find_all_with_list_attribute_values(self):
-        # You can pass a list of attribute values instead of just one,
-        # and you'll get tags that match any of the values.
-        tree = self.soup("""<a id="1">1</a>
-                            <a id="2">2</a>
-                            <a id="3">3</a>
-                            <a>No ID.</a>""")
-        self.assertSelects(tree.find_all(id=["1", "3", "4"]),
-                           ["1", "3"])
-
-    def test_find_all_with_regular_expression_attribute_value(self):
-        # You can pass a regular expression as an attribute value, and
-        # you'll get tags whose values for that attribute match the
-        # regular expression.
-        tree = self.soup("""<a id="a">One a.</a>
-                            <a id="aa">Two as.</a>
-                            <a id="ab">Mixed as and bs.</a>
-                            <a id="b">One b.</a>
-                            <a>No ID.</a>""")
-
-        self.assertSelects(tree.find_all(id=re.compile("^a+$")),
-                           ["One a.", "Two as."])
-
-    def test_find_by_name_and_containing_string(self):
-        soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
-        a = soup.a
-
-        self.assertEqual([a], soup.find_all("a", text="foo"))
-        self.assertEqual([], soup.find_all("a", text="bar"))
-        self.assertEqual([], soup.find_all("a", text="bar"))
-
-    def test_find_by_name_and_containing_string_when_string_is_buried(self):
-        soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
-        self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
-
-    def test_find_by_attribute_and_containing_string(self):
-        soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
-        a = soup.a
-
-        self.assertEqual([a], soup.find_all(id=2, text="foo"))
-        self.assertEqual([], soup.find_all(id=1, text="bar"))
-
-
-
-
-class TestIndex(TreeTest):
-    """Test Tag.index"""
-    def test_index(self):
-        tree = self.soup("""<div>
-                            <a>Identical</a>
-                            <b>Not identical</b>
-                            <a>Identical</a>
-
-                            <c><d>Identical with child</d></c>
-                            <b>Also not identical</b>
-                            <c><d>Identical with child</d></c>
-                            </div>""")
-        div = tree.div
-        for i, element in enumerate(div.contents):
-            self.assertEqual(i, div.index(element))
-        self.assertRaises(ValueError, tree.index, 1)
-
-
-class TestParentOperations(TreeTest):
-    """Test navigation and searching through an element's parents."""
-
-    def setUp(self):
-        super(TestParentOperations, self).setUp()
-        self.tree = self.soup('''<ul id="empty"></ul>
-                                 <ul id="top">
-                                  <ul id="middle">
-                                   <ul id="bottom">
-                                    <b>Start here</b>
-                                   </ul>
-                                  </ul>''')
-        self.start = self.tree.b
-
-
-    def test_parent(self):
-        self.assertEqual(self.start.parent['id'], 'bottom')
-        self.assertEqual(self.start.parent.parent['id'], 'middle')
-        self.assertEqual(self.start.parent.parent.parent['id'], 'top')
-
-    def test_parent_of_top_tag_is_soup_object(self):
-        top_tag = self.tree.contents[0]
-        self.assertEqual(top_tag.parent, self.tree)
-
-    def test_soup_object_has_no_parent(self):
-        self.assertEqual(None, self.tree.parent)
-
-    def test_find_parents(self):
-        self.assertSelectsIDs(
-            self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
-        self.assertSelectsIDs(
-            self.start.find_parents('ul', id="middle"), ['middle'])
-
-    def test_find_parent(self):
-        self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
-
-    def test_parent_of_text_element(self):
-        text = self.tree.find(text="Start here")
-        self.assertEqual(text.parent.name, 'b')
-
-    def test_text_element_find_parent(self):
-        text = self.tree.find(text="Start here")
-        self.assertEqual(text.find_parent('ul')['id'], 'bottom')
-
-    def test_parent_generator(self):
-        parents = [parent['id'] for parent in self.start.parents
-                   if parent is not None and 'id' in parent.attrs]
-        self.assertEqual(parents, ['bottom', 'middle', 'top'])
-
-
-class ProximityTest(TreeTest):
-
-    def setUp(self):
-        super(TreeTest, self).setUp()
-        self.tree = self.soup(
-            '<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
-
-
-class TestNextOperations(ProximityTest):
-
-    def setUp(self):
-        super(TestNextOperations, self).setUp()
-        self.start = self.tree.b
-
-    def test_next(self):
-        self.assertEqual(self.start.next_element, "One")
-        self.assertEqual(self.start.next_element.next_element['id'], "2")
-
-    def test_next_of_last_item_is_none(self):
-        last = self.tree.find(text="Three")
-        self.assertEqual(last.next_element, None)
-
-    def test_next_of_root_is_none(self):
-        # The document root is outside the next/previous chain.
-        self.assertEqual(self.tree.next_element, None)
-
-    def test_find_all_next(self):
-        self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
-        self.start.find_all_next(id=3)
-        self.assertSelects(self.start.find_all_next(id=3), ["Three"])
-
-    def test_find_next(self):
-        self.assertEqual(self.start.find_next('b')['id'], '2')
-        self.assertEqual(self.start.find_next(text="Three"), "Three")
-
-    def test_find_next_for_text_element(self):
-        text = self.tree.find(text="One")
-        self.assertEqual(text.find_next("b").string, "Two")
-        self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
-
-    def test_next_generator(self):
-        start = self.tree.find(text="Two")
-        successors = [node for node in start.next_elements]
-        # There are two successors: the final <b> tag and its text contents.
-        tag, contents = successors
-        self.assertEqual(tag['id'], '3')
-        self.assertEqual(contents, "Three")
-
-class TestPreviousOperations(ProximityTest):
-
-    def setUp(self):
-        super(TestPreviousOperations, self).setUp()
-        self.end = self.tree.find(text="Three")
-
-    def test_previous(self):
-        self.assertEqual(self.end.previous_element['id'], "3")
-        self.assertEqual(self.end.previous_element.previous_element, "Two")
-
-    def test_previous_of_first_item_is_none(self):
-        first = self.tree.find('html')
-        self.assertEqual(first.previous_element, None)
-
-    def test_previous_of_root_is_none(self):
-        # The document root is outside the next/previous chain.
-        # XXX This is broken!
-        #self.assertEqual(self.tree.previous_element, None)
-        pass
-
-    def test_find_all_previous(self):
-        # The <b> tag containing the "Three" node is the predecessor
-        # of the "Three" node itself, which is why "Three" shows up
-        # here.
-        self.assertSelects(
-            self.end.find_all_previous('b'), ["Three", "Two", "One"])
-        self.assertSelects(self.end.find_all_previous(id=1), ["One"])
-
-    def test_find_previous(self):
-        self.assertEqual(self.end.find_previous('b')['id'], '3')
-        self.assertEqual(self.end.find_previous(text="One"), "One")
-
-    def test_find_previous_for_text_element(self):
-        text = self.tree.find(text="Three")
-        self.assertEqual(text.find_previous("b").string, "Three")
-        self.assertSelects(
-            text.find_all_previous("b"), ["Three", "Two", "One"])
-
-    def test_previous_generator(self):
-        start = self.tree.find(text="One")
-        predecessors = [node for node in start.previous_elements]
-
-        # There are four predecessors: the <b> tag containing "One"
-        # the <body> tag, the <head> tag, and the <html> tag.
-        b, body, head, html = predecessors
-        self.assertEqual(b['id'], '1')
-        self.assertEqual(body.name, "body")
-        self.assertEqual(head.name, "head")
-        self.assertEqual(html.name, "html")
-
-
-class SiblingTest(TreeTest):
-
-    def setUp(self):
-        super(SiblingTest, self).setUp()
-        markup = '''<html>
-                    <span id="1">
-                     <span id="1.1"></span>
-                    </span>
-                    <span id="2">
-                     <span id="2.1"></span>
-                    </span>
-                    <span id="3">
-                     <span id="3.1"></span>
-                    </span>
-                    <span id="4"></span>
-                    </html>'''
-        # All that whitespace looks good but makes the tests more
-        # difficult. Get rid of it.
-        markup = re.compile("\n\s*").sub("", markup)
-        self.tree = self.soup(markup)
-
-
-class TestNextSibling(SiblingTest):
-
-    def setUp(self):
-        super(TestNextSibling, self).setUp()
-        self.start = self.tree.find(id="1")
-
-    def test_next_sibling_of_root_is_none(self):
-        self.assertEqual(self.tree.next_sibling, None)
-
-    def test_next_sibling(self):
-        self.assertEqual(self.start.next_sibling['id'], '2')
-        self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
-
-        # Note the difference between next_sibling and next_element.
-        self.assertEqual(self.start.next_element['id'], '1.1')
-
-    def test_next_sibling_may_not_exist(self):
-        self.assertEqual(self.tree.html.next_sibling, None)
-
-        nested_span = self.tree.find(id="1.1")
-        self.assertEqual(nested_span.next_sibling, None)
-
-        last_span = self.tree.find(id="4")
-        self.assertEqual(last_span.next_sibling, None)
-
-    def test_find_next_sibling(self):
-        self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
-
-    def test_next_siblings(self):
-        self.assertSelectsIDs(self.start.find_next_siblings("span"),
-                              ['2', '3', '4'])
-
-        self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
-
-    def test_next_sibling_for_text_element(self):
-        soup = self.soup("Foo<b>bar</b>baz")
-        start = soup.find(text="Foo")
-        self.assertEqual(start.next_sibling.name, 'b')
-        self.assertEqual(start.next_sibling.next_sibling, 'baz')
-
-        self.assertSelects(start.find_next_siblings('b'), ['bar'])
-        self.assertEqual(start.find_next_sibling(text="baz"), "baz")
-        self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
-
-
-class TestPreviousSibling(SiblingTest):
-
-    def setUp(self):
-        super(TestPreviousSibling, self).setUp()
-        self.end = self.tree.find(id="4")
-
-    def test_previous_sibling_of_root_is_none(self):
-        self.assertEqual(self.tree.previous_sibling, None)
-
-    def test_previous_sibling(self):
-        self.assertEqual(self.end.previous_sibling['id'], '3')
-        self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
-
-        # Note the difference between previous_sibling and previous_element.
-        self.assertEqual(self.end.previous_element['id'], '3.1')
-
-    def test_previous_sibling_may_not_exist(self):
-        self.assertEqual(self.tree.html.previous_sibling, None)
-
-        nested_span = self.tree.find(id="1.1")
-        self.assertEqual(nested_span.previous_sibling, None)
-
-        first_span = self.tree.find(id="1")
-        self.assertEqual(first_span.previous_sibling, None)
-
-    def test_find_previous_sibling(self):
-        self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
-
-    def test_previous_siblings(self):
-        self.assertSelectsIDs(self.end.find_previous_siblings("span"),
-                              ['3', '2', '1'])
-
-        self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
-
-    def test_previous_sibling_for_text_element(self):
-        soup = self.soup("Foo<b>bar</b>baz")
-        start = soup.find(text="baz")
-        self.assertEqual(start.previous_sibling.name, 'b')
-        self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
-
-        self.assertSelects(start.find_previous_siblings('b'), ['bar'])
-        self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
-        self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
-
-
-class TestTagCreation(SoupTest):
-    """Test the ability to create new tags."""
-    def test_new_tag(self):
-        soup = self.soup("")
-        new_tag = soup.new_tag("foo", bar="baz")
-        self.assertTrue(isinstance(new_tag, Tag))
-        self.assertEqual("foo", new_tag.name)
-        self.assertEqual(dict(bar="baz"), new_tag.attrs)
-        self.assertEqual(None, new_tag.parent)
-
-    def test_tag_inherits_self_closing_rules_from_builder(self):
-        if XML_BUILDER_PRESENT:
-            xml_soup = BeautifulSoup("", "xml")
-            xml_br = xml_soup.new_tag("br")
-            xml_p = xml_soup.new_tag("p")
-
-            # Both the <br> and <p> tag are empty-element, just because
-            # they have no contents.
-            self.assertEqual(b"<br/>", xml_br.encode())
-            self.assertEqual(b"<p/>", xml_p.encode())
-
-        html_soup = BeautifulSoup("", "html")
-        html_br = html_soup.new_tag("br")
-        html_p = html_soup.new_tag("p")
-
-        # The HTML builder users HTML's rules about which tags are
-        # empty-element tags, and the new tags reflect these rules.
-        self.assertEqual(b"<br/>", html_br.encode())
-        self.assertEqual(b"<p></p>", html_p.encode())
-
-    def test_new_string_creates_navigablestring(self):
-        soup = self.soup("")
-        s = soup.new_string("foo")
-        self.assertEqual("foo", s)
-        self.assertTrue(isinstance(s, NavigableString))
-
-class TestTreeModification(SoupTest):
-
-    def test_attribute_modification(self):
-        soup = self.soup('<a id="1"></a>')
-        soup.a['id'] = 2
-        self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
-        del(soup.a['id'])
-        self.assertEqual(soup.decode(), self.document_for('<a></a>'))
-        soup.a['id2'] = 'foo'
-        self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
-
-    def test_new_tag_creation(self):
-        builder = builder_registry.lookup('html')()
-        soup = self.soup("<body></body>", builder=builder)
-        a = Tag(soup, builder, 'a')
-        ol = Tag(soup, builder, 'ol')
-        a['href'] = 'http://foo.com/'
-        soup.body.insert(0, a)
-        soup.body.insert(1, ol)
-        self.assertEqual(
-            soup.body.encode(),
-            b'<body><a href="http://foo.com/"></a><ol></ol></body>')
-
-    def test_append_to_contents_moves_tag(self):
-        doc = """<p id="1">Don't leave me <b>here</b>.</p>
-                <p id="2">Don\'t leave!</p>"""
-        soup = self.soup(doc)
-        second_para = soup.find(id='2')
-        bold = soup.b
-
-        # Move the <b> tag to the end of the second paragraph.
-        soup.find(id='2').append(soup.b)
-
-        # The <b> tag is now a child of the second paragraph.
-        self.assertEqual(bold.parent, second_para)
-
-        self.assertEqual(
-            soup.decode(), self.document_for(
-                '<p id="1">Don\'t leave me .</p>\n'
-                '<p id="2">Don\'t leave!<b>here</b></p>'))
-
-    def test_replace_with_returns_thing_that_was_replaced(self):
-        text = "<a></a><b><c></c></b>"
-        soup = self.soup(text)
-        a = soup.a
-        new_a = a.replace_with(soup.c)
-        self.assertEqual(a, new_a)
-
-    def test_unwrap_returns_thing_that_was_replaced(self):
-        text = "<a><b></b><c></c></a>"
-        soup = self.soup(text)
-        a = soup.a
-        new_a = a.unwrap()
-        self.assertEqual(a, new_a)
-
-    def test_replace_tag_with_itself(self):
-        text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
-        soup = self.soup(text)
-        c = soup.c
-        soup.c.replace_with(c)
-        self.assertEqual(soup.decode(), self.document_for(text))
-
-    def test_replace_tag_with_its_parent_raises_exception(self):
-        text = "<a><b></b></a>"
-        soup = self.soup(text)
-        self.assertRaises(ValueError, soup.b.replace_with, soup.a)
-
-    def test_insert_tag_into_itself_raises_exception(self):
-        text = "<a><b></b></a>"
-        soup = self.soup(text)
-        self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
-
-    def test_replace_with_maintains_next_element_throughout(self):
-        soup = self.soup('<p><a>one</a><b>three</b></p>')
-        a = soup.a
-        b = a.contents[0]
-        # Make it so the <a> tag has two text children.
-        a.insert(1, "two")
-
-        # Now replace each one with the empty string.
-        left, right = a.contents
-        left.replaceWith('')
-        right.replaceWith('')
-
-        # The <b> tag is still connected to the tree.
-        self.assertEqual("three", soup.b.string)
-
-    def test_replace_final_node(self):
-        soup = self.soup("<b>Argh!</b>")
-        soup.find(text="Argh!").replace_with("Hooray!")
-        new_text = soup.find(text="Hooray!")
-        b = soup.b
-        self.assertEqual(new_text.previous_element, b)
-        self.assertEqual(new_text.parent, b)
-        self.assertEqual(new_text.previous_element.next_element, new_text)
-        self.assertEqual(new_text.next_element, None)
-
-    def test_consecutive_text_nodes(self):
-        # A builder should never create two consecutive text nodes,
-        # but if you insert one next to another, Beautiful Soup will
-        # handle it correctly.
-        soup = self.soup("<a><b>Argh!</b><c></c></a>")
-        soup.b.insert(1, "Hooray!")
-
-        self.assertEqual(
-            soup.decode(), self.document_for(
-                "<a><b>Argh!Hooray!</b><c></c></a>"))
-
-        new_text = soup.find(text="Hooray!")
-        self.assertEqual(new_text.previous_element, "Argh!")
-        self.assertEqual(new_text.previous_element.next_element, new_text)
-
-        self.assertEqual(new_text.previous_sibling, "Argh!")
-        self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
-
-        self.assertEqual(new_text.next_sibling, None)
-        self.assertEqual(new_text.next_element, soup.c)
-
-    def test_insert_string(self):
-        soup = self.soup("<a></a>")
-        soup.a.insert(0, "bar")
-        soup.a.insert(0, "foo")
-        # The string were added to the tag.
-        self.assertEqual(["foo", "bar"], soup.a.contents)
-        # And they were converted to NavigableStrings.
-        self.assertEqual(soup.a.contents[0].next_element, "bar")
-
-    def test_insert_tag(self):
-        builder = self.default_builder
-        soup = self.soup(
-            "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
-        magic_tag = Tag(soup, builder, 'magictag')
-        magic_tag.insert(0, "the")
-        soup.a.insert(1, magic_tag)
-
-        self.assertEqual(
-            soup.decode(), self.document_for(
-                "<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
-
-        # Make sure all the relationships are hooked up correctly.
-        b_tag = soup.b
-        self.assertEqual(b_tag.next_sibling, magic_tag)
-        self.assertEqual(magic_tag.previous_sibling, b_tag)
-
-        find = b_tag.find(text="Find")
-        self.assertEqual(find.next_element, magic_tag)
-        self.assertEqual(magic_tag.previous_element, find)
-
-        c_tag = soup.c
-        self.assertEqual(magic_tag.next_sibling, c_tag)
-        self.assertEqual(c_tag.previous_sibling, magic_tag)
-
-        the = magic_tag.find(text="the")
-        self.assertEqual(the.parent, magic_tag)
-        self.assertEqual(the.next_element, c_tag)
-        self.assertEqual(c_tag.previous_element, the)
-
-    def test_append_child_thats_already_at_the_end(self):
-        data = "<a><b></b></a>"
-        soup = self.soup(data)
-        soup.a.append(soup.b)
-        self.assertEqual(data, soup.decode())
-
-    def test_move_tag_to_beginning_of_parent(self):
-        data = "<a><b></b><c></c><d></d></a>"
-        soup = self.soup(data)
-        soup.a.insert(0, soup.d)
-        self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
-
-    def test_insert_works_on_empty_element_tag(self):
-        # This is a little strange, since most HTML parsers don't allow
-        # markup like this to come through. But in general, we don't
-        # know what the parser would or wouldn't have allowed, so
-        # I'm letting this succeed for now.
-        soup = self.soup("<br/>")
-        soup.br.insert(1, "Contents")
-        self.assertEqual(str(soup.br), "<br>Contents</br>")
-
-    def test_insert_before(self):
-        soup = self.soup("<a>foo</a><b>bar</b>")
-        soup.b.insert_before("BAZ")
-        soup.a.insert_before("QUUX")
-        self.assertEqual(
-            soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
-
-        soup.a.insert_before(soup.b)
-        self.assertEqual(
-            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
-
-    def test_insert_after(self):
-        soup = self.soup("<a>foo</a><b>bar</b>")
-        soup.b.insert_after("BAZ")
-        soup.a.insert_after("QUUX")
-        self.assertEqual(
-            soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
-        soup.b.insert_after(soup.a)
-        self.assertEqual(
-            soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
-
-    def test_insert_after_raises_valueerror_if_after_has_no_meaning(self):
-        soup = self.soup("")
-        tag = soup.new_tag("a")
-        string = soup.new_string("")
-        self.assertRaises(ValueError, string.insert_after, tag)
-        self.assertRaises(ValueError, soup.insert_after, tag)
-        self.assertRaises(ValueError, tag.insert_after, tag)
-
-    def test_insert_before_raises_valueerror_if_before_has_no_meaning(self):
-        soup = self.soup("")
-        tag = soup.new_tag("a")
-        string = soup.new_string("")
-        self.assertRaises(ValueError, string.insert_before, tag)
-        self.assertRaises(ValueError, soup.insert_before, tag)
-        self.assertRaises(ValueError, tag.insert_before, tag)
-
-    def test_replace_with(self):
-        soup = self.soup(
-                "<p>There's <b>no</b> business like <b>show</b> business</p>")
-        no, show = soup.find_all('b')
-        show.replace_with(no)
-        self.assertEqual(
-            soup.decode(),
-            self.document_for(
-                "<p>There's  business like <b>no</b> business</p>"))
-
-        self.assertEqual(show.parent, None)
-        self.assertEqual(no.parent, soup.p)
-        self.assertEqual(no.next_element, "no")
-        self.assertEqual(no.next_sibling, " business")
-
-    def test_replace_first_child(self):
-        data = "<a><b></b><c></c></a>"
-        soup = self.soup(data)
-        soup.b.replace_with(soup.c)
-        self.assertEqual("<a><c></c></a>", soup.decode())
-
-    def test_replace_last_child(self):
-        data = "<a><b></b><c></c></a>"
-        soup = self.soup(data)
-        soup.c.replace_with(soup.b)
-        self.assertEqual("<a><b></b></a>", soup.decode())
-
-    def test_nested_tag_replace_with(self):
-        soup = self.soup(
-            """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
-
-        # Replace the entire <b> tag and its contents ("reserve the
-        # right") with the <f> tag ("refuse").
-        remove_tag = soup.b
-        move_tag = soup.f
-        remove_tag.replace_with(move_tag)
-
-        self.assertEqual(
-            soup.decode(), self.document_for(
-                "<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
-
-        # The <b> tag is now an orphan.
-        self.assertEqual(remove_tag.parent, None)
-        self.assertEqual(remove_tag.find(text="right").next_element, None)
-        self.assertEqual(remove_tag.previous_element, None)
-        self.assertEqual(remove_tag.next_sibling, None)
-        self.assertEqual(remove_tag.previous_sibling, None)
-
-        # The <f> tag is now connected to the <a> tag.
-        self.assertEqual(move_tag.parent, soup.a)
-        self.assertEqual(move_tag.previous_element, "We")
-        self.assertEqual(move_tag.next_element.next_element, soup.e)
-        self.assertEqual(move_tag.next_sibling, None)
-
-        # The gap where the <f> tag used to be has been mended, and
-        # the word "to" is now connected to the <g> tag.
-        to_text = soup.find(text="to")
-        g_tag = soup.g
-        self.assertEqual(to_text.next_element, g_tag)
-        self.assertEqual(to_text.next_sibling, g_tag)
-        self.assertEqual(g_tag.previous_element, to_text)
-        self.assertEqual(g_tag.previous_sibling, to_text)
-
-    def test_unwrap(self):
-        tree = self.soup("""
-            <p>Unneeded <em>formatting</em> is unneeded</p>
-            """)
-        tree.em.unwrap()
-        self.assertEqual(tree.em, None)
-        self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
-
-    def test_wrap(self):
-        soup = self.soup("I wish I was bold.")
-        value = soup.string.wrap(soup.new_tag("b"))
-        self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
-        self.assertEqual(
-            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
-
-    def test_wrap_extracts_tag_from_elsewhere(self):
-        soup = self.soup("<b></b>I wish I was bold.")
-        soup.b.next_sibling.wrap(soup.b)
-        self.assertEqual(
-            soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
-
-    def test_wrap_puts_new_contents_at_the_end(self):
-        soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
-        soup.b.next_sibling.wrap(soup.b)
-        self.assertEqual(2, len(soup.b.contents))
-        self.assertEqual(
-            soup.decode(), self.document_for(
-                "<b>I like being bold.I wish I was bold.</b>"))
-
-    def test_extract(self):
-        soup = self.soup(
-            '<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
-
-        self.assertEqual(len(soup.body.contents), 3)
-        extracted = soup.find(id="nav").extract()
-
-        self.assertEqual(
-            soup.decode(), "<html><body>Some content.  More content.</body></html>")
-        self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
-
-        # The extracted tag is now an orphan.
-        self.assertEqual(len(soup.body.contents), 2)
-        self.assertEqual(extracted.parent, None)
-        self.assertEqual(extracted.previous_element, None)
-        self.assertEqual(extracted.next_element.next_element, None)
-
-        # The gap where the extracted tag used to be has been mended.
-        content_1 = soup.find(text="Some content. ")
-        content_2 = soup.find(text=" More content.")
-        self.assertEqual(content_1.next_element, content_2)
-        self.assertEqual(content_1.next_sibling, content_2)
-        self.assertEqual(content_2.previous_element, content_1)
-        self.assertEqual(content_2.previous_sibling, content_1)
-
-    def test_extract_distinguishes_between_identical_strings(self):
-        soup = self.soup("<a>foo</a><b>bar</b>")
-        foo_1 = soup.a.string
-        bar_1 = soup.b.string
-        foo_2 = soup.new_string("foo")
-        bar_2 = soup.new_string("bar")
-        soup.a.append(foo_2)
-        soup.b.append(bar_2)
-
-        # Now there are two identical strings in the <a> tag, and two
-        # in the <b> tag. Let's remove the first "foo" and the second
-        # "bar".
-        foo_1.extract()
-        bar_2.extract()
-        self.assertEqual(foo_2, soup.a.string)
-        self.assertEqual(bar_2, soup.b.string)
-
-    def test_clear(self):
-        """Tag.clear()"""
-        soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
-        # clear using extract()
-        a = soup.a
-        soup.p.clear()
-        self.assertEqual(len(soup.p.contents), 0)
-        self.assertTrue(hasattr(a, "contents"))
-
-        # clear using decompose()
-        em = a.em
-        a.clear(decompose=True)
-        self.assertFalse(hasattr(em, "contents"))
-
-    def test_string_set(self):
-        """Tag.string = 'string'"""
-        soup = self.soup("<a></a> <b><c></c></b>")
-        soup.a.string = "foo"
-        self.assertEqual(soup.a.contents, ["foo"])
-        soup.b.string = "bar"
-        self.assertEqual(soup.b.contents, ["bar"])
-
-    def test_string_set_does_not_affect_original_string(self):
-        soup = self.soup("<a><b>foo</b><c>bar</c>")
-        soup.b.string = soup.c.string
-        self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
-
-    def test_set_string_preserves_class_of_string(self):
-        soup = self.soup("<a></a>")
-        cdata = CData("foo")
-        soup.a.string = cdata
-        self.assertTrue(isinstance(soup.a.string, CData))
-
-class TestElementObjects(SoupTest):
-    """Test various features of element objects."""
-
-    def test_len(self):
-        """The length of an element is its number of children."""
-        soup = self.soup("<top>1<b>2</b>3</top>")
-
-        # The BeautifulSoup object itself contains one element: the
-        # <top> tag.
-        self.assertEqual(len(soup.contents), 1)
-        self.assertEqual(len(soup), 1)
-
-        # The <top> tag contains three elements: the text node "1", the
-        # <b> tag, and the text node "3".
-        self.assertEqual(len(soup.top), 3)
-        self.assertEqual(len(soup.top.contents), 3)
-
-    def test_member_access_invokes_find(self):
-        """Accessing a Python member .foo invokes find('foo')"""
-        soup = self.soup('<b><i></i></b>')
-        self.assertEqual(soup.b, soup.find('b'))
-        self.assertEqual(soup.b.i, soup.find('b').find('i'))
-        self.assertEqual(soup.a, None)
-
-    def test_deprecated_member_access(self):
-        soup = self.soup('<b><i></i></b>')
-        with warnings.catch_warnings(record=True) as w:
-            tag = soup.bTag
-        self.assertEqual(soup.b, tag)
-        self.assertEqual(
-            '.bTag is deprecated, use .find("b") instead.',
-            str(w[0].message))
-
-    def test_has_attr(self):
-        """has_attr() checks for the presence of an attribute.
-
-        Please note note: has_attr() is different from
-        __in__. has_attr() checks the tag's attributes and __in__
-        checks the tag's chidlren.
-        """
-        soup = self.soup("<foo attr='bar'>")
-        self.assertTrue(soup.foo.has_attr('attr'))
-        self.assertFalse(soup.foo.has_attr('attr2'))
-
-
-    def test_attributes_come_out_in_alphabetical_order(self):
-        markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
-        self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
-
-    def test_string(self):
-        # A tag that contains only a text node makes that node
-        # available as .string.
-        soup = self.soup("<b>foo</b>")
-        self.assertEqual(soup.b.string, 'foo')
-
-    def test_empty_tag_has_no_string(self):
-        # A tag with no children has no .stirng.
-        soup = self.soup("<b></b>")
-        self.assertEqual(soup.b.string, None)
-
-    def test_tag_with_multiple_children_has_no_string(self):
-        # A tag with no children has no .string.
-        soup = self.soup("<a>foo<b></b><b></b></b>")
-        self.assertEqual(soup.b.string, None)
-
-        soup = self.soup("<a>foo<b></b>bar</b>")
-        self.assertEqual(soup.b.string, None)
-
-        # Even if all the children are strings, due to trickery,
-        # it won't work--but this would be a good optimization.
-        soup = self.soup("<a>foo</b>")
-        soup.a.insert(1, "bar")
-        self.assertEqual(soup.a.string, None)
-
-    def test_tag_with_recursive_string_has_string(self):
-        # A tag with a single child which has a .string inherits that
-        # .string.
-        soup = self.soup("<a><b>foo</b></a>")
-        self.assertEqual(soup.a.string, "foo")
-        self.assertEqual(soup.string, "foo")
-
-    def test_lack_of_string(self):
-        """Only a tag containing a single text node has a .string."""
-        soup = self.soup("<b>f<i>e</i>o</b>")
-        self.assertFalse(soup.b.string)
-
-        soup = self.soup("<b></b>")
-        self.assertFalse(soup.b.string)
-
-    def test_all_text(self):
-        """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
-        soup = self.soup("<a>a<b>r</b>   <r> t </r></a>")
-        self.assertEqual(soup.a.text, "ar  t ")
-        self.assertEqual(soup.a.get_text(strip=True), "art")
-        self.assertEqual(soup.a.get_text(","), "a,r, , t ")
-        self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
-
-class TestCDAtaListAttributes(SoupTest):
-
-    """Testing cdata-list attributes like 'class'.
-    """
-    def test_single_value_becomes_list(self):
-        soup = self.soup("<a class='foo'>")
-        self.assertEqual(["foo"],soup.a['class'])
-
-    def test_multiple_values_becomes_list(self):
-        soup = self.soup("<a class='foo bar'>")
-        self.assertEqual(["foo", "bar"], soup.a['class'])
-
-    def test_multiple_values_separated_by_weird_whitespace(self):
-        soup = self.soup("<a class='foo\tbar\nbaz'>")
-        self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
-
-    def test_attributes_joined_into_string_on_output(self):
-        soup = self.soup("<a class='foo\tbar'>")
-        self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
-
-    def test_accept_charset(self):
-        soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
-        self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
-
-    def test_cdata_attribute_applying_only_to_one_tag(self):
-        data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
-        soup = self.soup(data)
-        # We saw in another test that accept-charset is a cdata-list
-        # attribute for the <form> tag. But it's not a cdata-list
-        # attribute for any other tag.
-        self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
-
-
-class TestPersistence(SoupTest):
-    "Testing features like pickle and deepcopy."
-
-    def setUp(self):
-        super(TestPersistence, self).setUp()
-        self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
-"http://www.w3.org/TR/REC-html40/transitional.dtd">
-<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
-<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
-<link rev="made" href="mailto:leonardr@segfault.org">
-<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
-<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
-<meta name="author" content="Leonard Richardson">
-</head>
-<body>
-<a href="foo">foo</a>
-<a href="foo"><b>bar</b></a>
-</body>
-</html>"""
-        self.tree = self.soup(self.page)
-
-    def test_pickle_and_unpickle_identity(self):
-        # Pickling a tree, then unpickling it, yields a tree identical
-        # to the original.
-        dumped = pickle.dumps(self.tree, 2)
-        loaded = pickle.loads(dumped)
-        self.assertEqual(loaded.__class__, BeautifulSoup)
-        self.assertEqual(loaded.decode(), self.tree.decode())
-
-    def test_deepcopy_identity(self):
-        # Making a deepcopy of a tree yields an identical tree.
-        copied = copy.deepcopy(self.tree)
-        self.assertEqual(copied.decode(), self.tree.decode())
-
-    def test_unicode_pickle(self):
-        # A tree containing Unicode characters can be pickled.
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
-        loaded = pickle.loads(dumped)
-        self.assertEqual(loaded.decode(), soup.decode())
-
-
-class TestSubstitutions(SoupTest):
-
-    def test_default_formatter_is_minimal(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter="minimal")
-        # The < is converted back into &lt; but the e-with-acute is left alone.
-        self.assertEqual(
-            decoded,
-            self.document_for(
-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
-
-    def test_formatter_html(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter="html")
-        self.assertEqual(
-            decoded,
-            self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
-
-    def test_formatter_minimal(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter="minimal")
-        # The < is converted back into &lt; but the e-with-acute is left alone.
-        self.assertEqual(
-            decoded,
-            self.document_for(
-                u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
-
-    def test_formatter_null(self):
-        markup = u"<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter=None)
-        # Neither the angle brackets nor the e-with-acute are converted.
-        # This is not valid HTML, but it's what the user wanted.
-        self.assertEqual(decoded,
-                          self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
-
-    def test_formatter_custom(self):
-        markup = u"<b>&lt;foo&gt;</b><b>bar</b>"
-        soup = self.soup(markup)
-        decoded = soup.decode(formatter = lambda x: x.upper())
-        # Instead of normal entity conversion code, the custom
-        # callable is called on every string.
-        self.assertEqual(
-            decoded,
-            self.document_for(u"<b><FOO></b><b>BAR</b>"))
-
-    def test_formatter_is_run_on_attribute_values(self):
-        markup = u'<a href="http://a.com?a=b&c=é">e</a>'
-        soup = self.soup(markup)
-        a = soup.a
-
-        expect_minimal = u'<a href="http://a.com?a=b&amp;c=é">e</a>'
-
-        self.assertEqual(expect_minimal, a.decode())
-        self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
-
-        expect_html = u'<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
-        self.assertEqual(expect_html, a.decode(formatter="html"))
-
-        self.assertEqual(markup, a.decode(formatter=None))
-        expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>'
-        self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
-
-    def test_prettify_accepts_formatter(self):
-        soup = BeautifulSoup("<html><body>foo</body></html>")
-        pretty = soup.prettify(formatter = lambda x: x.upper())
-        self.assertTrue("FOO" in pretty)
-
-    def test_prettify_outputs_unicode_by_default(self):
-        soup = self.soup("<a></a>")
-        self.assertEqual(unicode, type(soup.prettify()))
-
-    def test_prettify_can_encode_data(self):
-        soup = self.soup("<a></a>")
-        self.assertEqual(bytes, type(soup.prettify("utf-8")))
-
-    def test_html_entity_substitution_off_by_default(self):
-        markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
-        soup = self.soup(markup)
-        encoded = soup.b.encode("utf-8")
-        self.assertEqual(encoded, markup.encode('utf-8'))
-
-    def test_encoding_substitution(self):
-        # Here's the <meta> tag saying that a document is
-        # encoded in Shift-JIS.
-        meta_tag = ('<meta content="text/html; charset=x-sjis" '
-                    'http-equiv="Content-type"/>')
-        soup = self.soup(meta_tag)
-
-        # Parse the document, and the charset apprears unchanged.
-        self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
-
-        # Encode the document into some encoding, and the encoding is
-        # substituted into the meta tag.
-        utf_8 = soup.encode("utf-8")
-        self.assertTrue(b"charset=utf-8" in utf_8)
-
-        euc_jp = soup.encode("euc_jp")
-        self.assertTrue(b"charset=euc_jp" in euc_jp)
-
-        shift_jis = soup.encode("shift-jis")
-        self.assertTrue(b"charset=shift-jis" in shift_jis)
-
-        utf_16_u = soup.encode("utf-16").decode("utf-16")
-        self.assertTrue("charset=utf-16" in utf_16_u)
-
-    def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
-        markup = ('<head><meta content="text/html; charset=x-sjis" '
-                    'http-equiv="Content-type"/></head><pre>foo</pre>')
-
-        # Beautiful Soup used to try to rewrite the meta tag even if the
-        # meta tag got filtered out by the strainer. This test makes
-        # sure that doesn't happen.
-        strainer = SoupStrainer('pre')
-        soup = self.soup(markup, parse_only=strainer)
-        self.assertEqual(soup.contents[0].name, 'pre')
-
-class TestEncoding(SoupTest):
-    """Test the ability to encode objects into strings."""
-
-    def test_unicode_string_can_be_encoded(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(soup.b.string.encode("utf-8"),
-                          u"\N{SNOWMAN}".encode("utf-8"))
-
-    def test_tag_containing_unicode_string_can_be_encoded(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(
-            soup.b.encode("utf-8"), html.encode("utf-8"))
-
-    def test_encoding_substitutes_unrecognized_characters_by_default(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
-
-    def test_encoding_can_be_made_strict(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertRaises(
-            UnicodeEncodeError, soup.encode, "ascii", errors="strict")
-
-    def test_decode_contents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents())
-
-    def test_encode_contents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(
-            u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
-                encoding="utf8"))
-
-    def test_deprecated_renderContents(self):
-        html = u"<b>\N{SNOWMAN}</b>"
-        soup = self.soup(html)
-        self.assertEqual(
-            u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
-
-class TestNavigableStringSubclasses(SoupTest):
-
-    def test_cdata(self):
-        # None of the current builders turn CDATA sections into CData
-        # objects, but you can create them manually.
-        soup = self.soup("")
-        cdata = CData("foo")
-        soup.insert(1, cdata)
-        self.assertEqual(str(soup), "<![CDATA[foo]]>")
-        self.assertEqual(soup.find(text="foo"), "foo")
-        self.assertEqual(soup.contents[0], "foo")
-
-    def test_cdata_is_never_formatted(self):
-        """Text inside a CData object is passed into the formatter.
-
-        But the return value is ignored.
-        """
-
-        self.count = 0
-        def increment(*args):
-            self.count += 1
-            return "BITTER FAILURE"
-
-        soup = self.soup("")
-        cdata = CData("<><><>")
-        soup.insert(1, cdata)
-        self.assertEqual(
-            b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
-        self.assertEqual(1, self.count)
-
-    def test_doctype_ends_in_newline(self):
-        # Unlike other NavigableString subclasses, a DOCTYPE always ends
-        # in a newline.
-        doctype = Doctype("foo")
-        soup = self.soup("")
-        soup.insert(1, doctype)
-        self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
-
-
-class TestSoupSelector(TreeTest):
-
-    HTML = """
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-"http://www.w3.org/TR/html4/strict.dtd">
-<html>
-<head>
-<title>The title</title>
-<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
-</head>
-<body>
-
-<div id="main">
-<div id="inner">
-<h1 id="header1">An H1</h1>
-<p>Some text</p>
-<p class="onep" id="p1">Some more text</p>
-<h2 id="header2">An H2</h2>
-<p class="class1 class2 class3" id="pmulti">Another</p>
-<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
-<h2 id="header3">Another H2</h2>
-<a id="me" href="http://simonwillison.net/" rel="me">me</a>
-<span class="s1">
-<a href="#" id="s1a1">span1a1</a>
-<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
-<span class="span2">
-<a href="#" id="s2a1">span2a1</a>
-</span>
-<span class="span3"></span>
-</span>
-</div>
-<p lang="en" id="lang-en">English</p>
-<p lang="en-gb" id="lang-en-gb">English UK</p>
-<p lang="en-us" id="lang-en-us">English US</p>
-<p lang="fr" id="lang-fr">French</p>
-</div>
-
-<div id="footer">
-</div>
-"""
-
-    def setUp(self):
-        self.soup = BeautifulSoup(self.HTML)
-
-    def assertSelects(self, selector, expected_ids):
-        el_ids = [el['id'] for el in self.soup.select(selector)]
-        el_ids.sort()
-        expected_ids.sort()
-        self.assertEqual(expected_ids, el_ids,
-            "Selector %s, expected [%s], got [%s]" % (
-                selector, ', '.join(expected_ids), ', '.join(el_ids)
-            )
-        )
-
-    assertSelect = assertSelects
-
-    def assertSelectMultiple(self, *tests):
-        for selector, expected_ids in tests:
-            self.assertSelect(selector, expected_ids)
-
-    def test_one_tag_one(self):
-        els = self.soup.select('title')
-        self.assertEqual(len(els), 1)
-        self.assertEqual(els[0].name, 'title')
-        self.assertEqual(els[0].contents, [u'The title'])
-
-    def test_one_tag_many(self):
-        els = self.soup.select('div')
-        self.assertEqual(len(els), 3)
-        for div in els:
-            self.assertEqual(div.name, 'div')
-
-    def test_tag_in_tag_one(self):
-        els = self.soup.select('div div')
-        self.assertSelects('div div', ['inner'])
-
-    def test_tag_in_tag_many(self):
-        for selector in ('html div', 'html body div', 'body div'):
-            self.assertSelects(selector, ['main', 'inner', 'footer'])
-
-    def test_tag_no_match(self):
-        self.assertEqual(len(self.soup.select('del')), 0)
-
-    def test_invalid_tag(self):
-        self.assertEqual(len(self.soup.select('tag%t')), 0)
-
-    def test_header_tags(self):
-        self.assertSelectMultiple(
-            ('h1', ['header1']),
-            ('h2', ['header2', 'header3']),
-        )
-
-    def test_class_one(self):
-        for selector in ('.onep', 'p.onep', 'html p.onep'):
-            els = self.soup.select(selector)
-            self.assertEqual(len(els), 1)
-            self.assertEqual(els[0].name, 'p')
-            self.assertEqual(els[0]['class'], ['onep'])
-
-    def test_class_mismatched_tag(self):
-        els = self.soup.select('div.onep')
-        self.assertEqual(len(els), 0)
-
-    def test_one_id(self):
-        for selector in ('div#inner', '#inner', 'div div#inner'):
-            self.assertSelects(selector, ['inner'])
-
-    def test_bad_id(self):
-        els = self.soup.select('#doesnotexist')
-        self.assertEqual(len(els), 0)
-
-    def test_items_in_id(self):
-        els = self.soup.select('div#inner p')
-        self.assertEqual(len(els), 3)
-        for el in els:
-            self.assertEqual(el.name, 'p')
-        self.assertEqual(els[1]['class'], ['onep'])
-        self.assertFalse(els[0].has_key('class'))
-
-    def test_a_bunch_of_emptys(self):
-        for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
-            self.assertEqual(len(self.soup.select(selector)), 0)
-
-    def test_multi_class_support(self):
-        for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
-            '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
-            self.assertSelects(selector, ['pmulti'])
-
-    def test_multi_class_selection(self):
-        for selector in ('.class1.class3', '.class3.class2',
-                         '.class1.class2.class3'):
-            self.assertSelects(selector, ['pmulti'])
-
-    def test_child_selector(self):
-        self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
-        self.assertSelects('.s1 > a span', ['s1a2s1'])
-
-    def test_attribute_equals(self):
-        self.assertSelectMultiple(
-            ('p[class="onep"]', ['p1']),
-            ('p[id="p1"]', ['p1']),
-            ('[class="onep"]', ['p1']),
-            ('[id="p1"]', ['p1']),
-            ('link[rel="stylesheet"]', ['l1']),
-            ('link[type="text/css"]', ['l1']),
-            ('link[href="blah.css"]', ['l1']),
-            ('link[href="no-blah.css"]', []),
-            ('[rel="stylesheet"]', ['l1']),
-            ('[type="text/css"]', ['l1']),
-            ('[href="blah.css"]', ['l1']),
-            ('[href="no-blah.css"]', []),
-            ('p[href="no-blah.css"]', []),
-            ('[href="no-blah.css"]', []),
-        )
-
-    def test_attribute_tilde(self):
-        self.assertSelectMultiple(
-            ('p[class~="class1"]', ['pmulti']),
-            ('p[class~="class2"]', ['pmulti']),
-            ('p[class~="class3"]', ['pmulti']),
-            ('[class~="class1"]', ['pmulti']),
-            ('[class~="class2"]', ['pmulti']),
-            ('[class~="class3"]', ['pmulti']),
-            ('a[rel~="friend"]', ['bob']),
-            ('a[rel~="met"]', ['bob']),
-            ('[rel~="friend"]', ['bob']),
-            ('[rel~="met"]', ['bob']),
-        )
-
-    def test_attribute_startswith(self):
-        self.assertSelectMultiple(
-            ('[rel^="style"]', ['l1']),
-            ('link[rel^="style"]', ['l1']),
-            ('notlink[rel^="notstyle"]', []),
-            ('[rel^="notstyle"]', []),
-            ('link[rel^="notstyle"]', []),
-            ('link[href^="bla"]', ['l1']),
-            ('a[href^="http://"]', ['bob', 'me']),
-            ('[href^="http://"]', ['bob', 'me']),
-            ('[id^="p"]', ['pmulti', 'p1']),
-            ('[id^="m"]', ['me', 'main']),
-            ('div[id^="m"]', ['main']),
-            ('a[id^="m"]', ['me']),
-        )
-
-    def test_attribute_endswith(self):
-        self.assertSelectMultiple(
-            ('[href$=".css"]', ['l1']),
-            ('link[href$=".css"]', ['l1']),
-            ('link[id$="1"]', ['l1']),
-            ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']),
-            ('div[id$="1"]', []),
-            ('[id$="noending"]', []),
-        )
-
-    def test_attribute_contains(self):
-        self.assertSelectMultiple(
-            # From test_attribute_startswith
-            ('[rel*="style"]', ['l1']),
-            ('link[rel*="style"]', ['l1']),
-            ('notlink[rel*="notstyle"]', []),
-            ('[rel*="notstyle"]', []),
-            ('link[rel*="notstyle"]', []),
-            ('link[href*="bla"]', ['l1']),
-            ('a[href*="http://"]', ['bob', 'me']),
-            ('[href*="http://"]', ['bob', 'me']),
-            ('[id*="p"]', ['pmulti', 'p1']),
-            ('div[id*="m"]', ['main']),
-            ('a[id*="m"]', ['me']),
-            # From test_attribute_endswith
-            ('[href*=".css"]', ['l1']),
-            ('link[href*=".css"]', ['l1']),
-            ('link[id*="1"]', ['l1']),
-            ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']),
-            ('div[id*="1"]', []),
-            ('[id*="noending"]', []),
-            # New for this test
-            ('[href*="."]', ['bob', 'me', 'l1']),
-            ('a[href*="."]', ['bob', 'me']),
-            ('link[href*="."]', ['l1']),
-            ('div[id*="n"]', ['main', 'inner']),
-            ('div[id*="nn"]', ['inner']),
-        )
-
-    def test_attribute_exact_or_hypen(self):
-        self.assertSelectMultiple(
-            ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
-            ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
-            ('p[lang|="fr"]', ['lang-fr']),
-            ('p[lang|="gb"]', []),
-        )
-
-    def test_attribute_exists(self):
-        self.assertSelectMultiple(
-            ('[rel]', ['l1', 'bob', 'me']),
-            ('link[rel]', ['l1']),
-            ('a[rel]', ['bob', 'me']),
-            ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
-            ('p[class]', ['p1', 'pmulti']),
-            ('[blah]', []),
-            ('p[blah]', []),
-        )
-
-    def test_select_on_element(self):
-        # Other tests operate on the tree; this operates on an element
-        # within the tree.
-        inner = self.soup.find("div", id="main")
-        selected = inner.select("div")
-        # The <div id="inner"> tag was selected. The <div id="footer">
-        # tag was not.
-        self.assertSelectsIDs(selected, ['inner'])
diff --git a/data/interfaces/default/base.html b/data/interfaces/default/base.html
index 4fc8590b..152f9311 100644
--- a/data/interfaces/default/base.html
+++ b/data/interfaces/default/base.html
@@ -36,7 +36,7 @@
             </div>
             % elif headphones.CURRENT_VERSION != headphones.LATEST_VERSION and headphones.INSTALL_TYPE != 'win':
             <div id="updatebar">
-                A <a href="http://github.com/rembo10/headphones/compare/${headphones.CURRENT_VERSION}...${headphones.LATEST_VERSION}"> newer version</a> is available. You're ${headphones.COMMITS_BEHIND} commits behind. <a href="update">Update</a> or <a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
+                A <a href="https://github.com/AdeHub/headphones/compare/${headphones.CURRENT_VERSION}...${headphones.LATEST_VERSION}"> newer version</a> is available. You're ${headphones.COMMITS_BEHIND} commits behind. <a href="update">Update</a> or <a href="#" onclick="$('#updatebar').slideUp('slow');">Close</a>
             </div>
 			% endif
 	
diff --git a/data/interfaces/default/config.html b/data/interfaces/default/config.html
index d599471b..bc50b794 100644
--- a/data/interfaces/default/config.html
+++ b/data/interfaces/default/config.html
@@ -312,7 +312,7 @@ m<%inherit file="base.html"/>
                             </div>
                             <div class="row">
                                 <label>rutracker Password: </label>
-                                <input type="text" name="rutracker_password" value="${config['rutracker_password']}" size="36">
+                                <input type="password" name="rutracker_password" value="${config['rutracker_password']}" size="36">
                             </div>
                         </div>
                     </fieldset>
diff --git a/headphones/versioncheck.py b/headphones/versioncheck.py
index 429ec8b9..e1440fd8 100644
--- a/headphones/versioncheck.py
+++ b/headphones/versioncheck.py
@@ -20,7 +20,7 @@ from headphones import logger, version
 
 import lib.simplejson as simplejson
 
-user = "rembo10"
+user = "AdeHub"
 branch = "master"
 
 def runGit(args):
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
new file mode 100644
index 00000000..16537aad
--- /dev/null
+++ b/html5lib/__init__.py
@@ -0,0 +1,17 @@
+""" 
+HTML parsing library based on the WHATWG "HTML5"
+specification. The parser is designed to be compatible with existing
+HTML found in the wild and implements well-defined error recovery that
+is largely compatible with modern desktop web browsers.
+
+Example usage:
+
+import html5lib
+f = open("my_document.html")
+tree = html5lib.parse(f) 
+"""
+__version__ = "0.95-dev"
+from html5parser import HTMLParser, parse, parseFragment
+from treebuilders import getTreeBuilder
+from treewalkers import getTreeWalker
+from serializer import serialize
diff --git a/html5lib/constants.py b/html5lib/constants.py
new file mode 100644
index 00000000..b533018e
--- /dev/null
+++ b/html5lib/constants.py
@@ -0,0 +1,3085 @@
+import string, gettext
+_ = gettext.gettext
+
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
+
+EOF = None
+
+E = {
+    "null-character": 
+       _(u"Null character in input stream, replaced with U+FFFD."),
+    "invalid-codepoint": 
+       _(u"Invalid codepoint in stream."),
+    "incorrectly-placed-solidus":
+       _(u"Solidus (/) incorrectly placed in tag."),
+    "incorrect-cr-newline-entity":
+       _(u"Incorrect CR newline entity, replaced with LF."),
+    "illegal-windows-1252-entity":
+       _(u"Entity used with illegal number (windows-1252 reference)."),
+    "cant-convert-numeric-entity":
+       _(u"Numeric entity couldn't be converted to character "
+         u"(codepoint U+%(charAsInt)08x)."),
+    "illegal-codepoint-for-numeric-entity":
+       _(u"Numeric entity represents an illegal codepoint: "
+         u"U+%(charAsInt)08x."),
+    "numeric-entity-without-semicolon":
+       _(u"Numeric entity didn't end with ';'."),
+    "expected-numeric-entity-but-got-eof":
+       _(u"Numeric entity expected. Got end of file instead."),
+    "expected-numeric-entity":
+       _(u"Numeric entity expected but none found."),
+    "named-entity-without-semicolon":
+       _(u"Named entity didn't end with ';'."),
+    "expected-named-entity":
+       _(u"Named entity expected. Got none."),
+    "attributes-in-end-tag":
+       _(u"End tag contains unexpected attributes."),
+    'self-closing-flag-on-end-tag':
+        _(u"End tag contains unexpected self-closing flag."),
+    "expected-tag-name-but-got-right-bracket":
+       _(u"Expected tag name. Got '>' instead."),
+    "expected-tag-name-but-got-question-mark":
+       _(u"Expected tag name. Got '?' instead. (HTML doesn't "
+         u"support processing instructions.)"),
+    "expected-tag-name":
+       _(u"Expected tag name. Got something else instead"),
+    "expected-closing-tag-but-got-right-bracket":
+       _(u"Expected closing tag. Got '>' instead. Ignoring '</>'."),
+    "expected-closing-tag-but-got-eof":
+       _(u"Expected closing tag. Unexpected end of file."),
+    "expected-closing-tag-but-got-char":
+       _(u"Expected closing tag. Unexpected character '%(data)s' found."),
+    "eof-in-tag-name":
+       _(u"Unexpected end of file in the tag name."),
+    "expected-attribute-name-but-got-eof":
+       _(u"Unexpected end of file. Expected attribute name instead."),
+    "eof-in-attribute-name":
+       _(u"Unexpected end of file in attribute name."),
+    "invalid-character-in-attribute-name":
+        _(u"Invalid chracter in attribute name"),
+    "duplicate-attribute":
+       _(u"Dropped duplicate attribute on tag."),
+    "expected-end-of-tag-name-but-got-eof":
+       _(u"Unexpected end of file. Expected = or end of tag."),
+    "expected-attribute-value-but-got-eof":
+       _(u"Unexpected end of file. Expected attribute value."),
+    "expected-attribute-value-but-got-right-bracket":
+       _(u"Expected attribute value. Got '>' instead."),
+    'equals-in-unquoted-attribute-value':
+        _(u"Unexpected = in unquoted attribute"),
+    'unexpected-character-in-unquoted-attribute-value':
+        _(u"Unexpected character in unquoted attribute"),
+    "invalid-character-after-attribute-name":
+       _(u"Unexpected character after attribute name."),
+    "unexpected-character-after-attribute-value":
+       _(u"Unexpected character after attribute value."),
+    "eof-in-attribute-value-double-quote":
+       _(u"Unexpected end of file in attribute value (\")."),
+    "eof-in-attribute-value-single-quote":
+       _(u"Unexpected end of file in attribute value (')."),
+    "eof-in-attribute-value-no-quotes":
+       _(u"Unexpected end of file in attribute value."),
+    "unexpected-EOF-after-solidus-in-tag":
+        _(u"Unexpected end of file in tag. Expected >"),
+    "unexpected-character-after-soldius-in-tag":
+        _(u"Unexpected character after / in tag. Expected >"),
+    "expected-dashes-or-doctype":
+       _(u"Expected '--' or 'DOCTYPE'. Not found."),
+    "unexpected-bang-after-double-dash-in-comment":
+        _(u"Unexpected ! after -- in comment"),
+    "unexpected-space-after-double-dash-in-comment":
+        _(u"Unexpected space after -- in comment"),
+    "incorrect-comment":
+       _(u"Incorrect comment."),
+    "eof-in-comment":
+       _(u"Unexpected end of file in comment."),
+    "eof-in-comment-end-dash":
+       _(u"Unexpected end of file in comment (-)"),
+    "unexpected-dash-after-double-dash-in-comment":
+       _(u"Unexpected '-' after '--' found in comment."),
+    "eof-in-comment-double-dash":
+       _(u"Unexpected end of file in comment (--)."),
+    "eof-in-comment-end-space-state":
+       _(u"Unexpected end of file in comment."),
+    "eof-in-comment-end-bang-state":
+       _(u"Unexpected end of file in comment."),
+    "unexpected-char-in-comment":
+       _(u"Unexpected character in comment found."),
+    "need-space-after-doctype":
+       _(u"No space after literal string 'DOCTYPE'."),
+    "expected-doctype-name-but-got-right-bracket":
+       _(u"Unexpected > character. Expected DOCTYPE name."),
+    "expected-doctype-name-but-got-eof":
+       _(u"Unexpected end of file. Expected DOCTYPE name."),
+    "eof-in-doctype-name":
+       _(u"Unexpected end of file in DOCTYPE name."),
+    "eof-in-doctype":
+       _(u"Unexpected end of file in DOCTYPE."),
+    "expected-space-or-right-bracket-in-doctype":
+       _(u"Expected space or '>'. Got '%(data)s'"),
+    "unexpected-end-of-doctype":
+       _(u"Unexpected end of DOCTYPE."),
+    "unexpected-char-in-doctype":
+       _(u"Unexpected character in DOCTYPE."),
+    "eof-in-innerhtml":
+       _(u"XXX innerHTML EOF"),
+    "unexpected-doctype":
+       _(u"Unexpected DOCTYPE. Ignored."),
+    "non-html-root":
+       _(u"html needs to be the first start tag."),
+    "expected-doctype-but-got-eof":
+       _(u"Unexpected End of file. Expected DOCTYPE."),
+    "unknown-doctype":
+       _(u"Erroneous DOCTYPE."),
+    "expected-doctype-but-got-chars":
+       _(u"Unexpected non-space characters. Expected DOCTYPE."),
+    "expected-doctype-but-got-start-tag":
+       _(u"Unexpected start tag (%(name)s). Expected DOCTYPE."),
+    "expected-doctype-but-got-end-tag":
+       _(u"Unexpected end tag (%(name)s). Expected DOCTYPE."),
+    "end-tag-after-implied-root":
+       _(u"Unexpected end tag (%(name)s) after the (implied) root element."),
+    "expected-named-closing-tag-but-got-eof":
+       _(u"Unexpected end of file. Expected end tag (%(name)s)."),
+    "two-heads-are-not-better-than-one":
+       _(u"Unexpected start tag head in existing head. Ignored."),
+    "unexpected-end-tag":
+       _(u"Unexpected end tag (%(name)s). Ignored."),
+    "unexpected-start-tag-out-of-my-head":
+       _(u"Unexpected start tag (%(name)s) that can be in head. Moved."),
+    "unexpected-start-tag":
+       _(u"Unexpected start tag (%(name)s)."),
+    "missing-end-tag":
+       _(u"Missing end tag (%(name)s)."),
+    "missing-end-tags":
+       _(u"Missing end tags (%(name)s)."),
+    "unexpected-start-tag-implies-end-tag":
+       _(u"Unexpected start tag (%(startName)s) "
+         u"implies end tag (%(endName)s)."),
+    "unexpected-start-tag-treated-as":
+       _(u"Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
+    "deprecated-tag":
+       _(u"Unexpected start tag %(name)s. Don't use it!"),
+    "unexpected-start-tag-ignored":
+       _(u"Unexpected start tag %(name)s. Ignored."),
+    "expected-one-end-tag-but-got-another":
+       _(u"Unexpected end tag (%(gotName)s). "
+         u"Missing end tag (%(expectedName)s)."),
+    "end-tag-too-early":
+       _(u"End tag (%(name)s) seen too early. Expected other end tag."),
+    "end-tag-too-early-named":
+       _(u"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
+    "end-tag-too-early-ignored":
+       _(u"End tag (%(name)s) seen too early. Ignored."),
+    "adoption-agency-1.1":
+       _(u"End tag (%(name)s) violates step 1, "
+         u"paragraph 1 of the adoption agency algorithm."),
+    "adoption-agency-1.2":
+       _(u"End tag (%(name)s) violates step 1, "
+         u"paragraph 2 of the adoption agency algorithm."),
+    "adoption-agency-1.3":
+       _(u"End tag (%(name)s) violates step 1, "
+         u"paragraph 3 of the adoption agency algorithm."),
+    "unexpected-end-tag-treated-as":
+       _(u"Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
+    "no-end-tag":
+       _(u"This element (%(name)s) has no end tag."),
+    "unexpected-implied-end-tag-in-table":
+       _(u"Unexpected implied end tag (%(name)s) in the table phase."),
+    "unexpected-implied-end-tag-in-table-body":
+       _(u"Unexpected implied end tag (%(name)s) in the table body phase."),
+    "unexpected-char-implies-table-voodoo":
+       _(u"Unexpected non-space characters in "
+         u"table context caused voodoo mode."),
+    "unexpected-hidden-input-in-table":
+       _(u"Unexpected input with type hidden in table context."),
+    "unexpected-form-in-table":
+       _(u"Unexpected form in table context."),
+    "unexpected-start-tag-implies-table-voodoo":
+       _(u"Unexpected start tag (%(name)s) in "
+         u"table context caused voodoo mode."),
+    "unexpected-end-tag-implies-table-voodoo":
+       _(u"Unexpected end tag (%(name)s) in "
+         u"table context caused voodoo mode."),
+    "unexpected-cell-in-table-body":
+       _(u"Unexpected table cell start tag (%(name)s) "
+         u"in the table body phase."),
+    "unexpected-cell-end-tag":
+       _(u"Got table cell end tag (%(name)s) "
+         u"while required end tags are missing."),
+    "unexpected-end-tag-in-table-body":
+       _(u"Unexpected end tag (%(name)s) in the table body phase. Ignored."),
+    "unexpected-implied-end-tag-in-table-row":
+       _(u"Unexpected implied end tag (%(name)s) in the table row phase."),
+    "unexpected-end-tag-in-table-row":
+       _(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."),
+    "unexpected-select-in-select":
+       _(u"Unexpected select start tag in the select phase "
+         u"treated as select end tag."),
+    "unexpected-input-in-select":
+       _(u"Unexpected input start tag in the select phase."),
+    "unexpected-start-tag-in-select":
+       _(u"Unexpected start tag token (%(name)s in the select phase. "
+         u"Ignored."),
+    "unexpected-end-tag-in-select":
+       _(u"Unexpected end tag (%(name)s) in the select phase. Ignored."),
+    "unexpected-table-element-start-tag-in-select-in-table":
+       _(u"Unexpected table element start tag (%(name)s) in the select in table phase."),
+    "unexpected-table-element-end-tag-in-select-in-table":
+       _(u"Unexpected table element end tag (%(name)s) in the select in table phase."),
+    "unexpected-char-after-body":
+       _(u"Unexpected non-space characters in the after body phase."),
+    "unexpected-start-tag-after-body":
+       _(u"Unexpected start tag token (%(name)s)"
+         u" in the after body phase."),
+    "unexpected-end-tag-after-body":
+       _(u"Unexpected end tag token (%(name)s)"
+         u" in the after body phase."),
+    "unexpected-char-in-frameset":
+       _(u"Unepxected characters in the frameset phase. Characters ignored."),
+    "unexpected-start-tag-in-frameset":
+       _(u"Unexpected start tag token (%(name)s)"
+         u" in the frameset phase. Ignored."),
+    "unexpected-frameset-in-frameset-innerhtml":
+       _(u"Unexpected end tag token (frameset) "
+         u"in the frameset phase (innerHTML)."),
+    "unexpected-end-tag-in-frameset":
+       _(u"Unexpected end tag token (%(name)s)"
+         u" in the frameset phase. Ignored."),
+    "unexpected-char-after-frameset":
+       _(u"Unexpected non-space characters in the "
+         u"after frameset phase. Ignored."),
+    "unexpected-start-tag-after-frameset":
+       _(u"Unexpected start tag (%(name)s)"
+         u" in the after frameset phase. Ignored."),
+    "unexpected-end-tag-after-frameset":
+       _(u"Unexpected end tag (%(name)s)"
+         u" in the after frameset phase. Ignored."),
+    "unexpected-end-tag-after-body-innerhtml":
+       _(u"Unexpected end tag after body(innerHtml)"),
+    "expected-eof-but-got-char":
+       _(u"Unexpected non-space characters. Expected end of file."),
+    "expected-eof-but-got-start-tag":
+       _(u"Unexpected start tag (%(name)s)"
+         u". Expected end of file."),
+    "expected-eof-but-got-end-tag":
+       _(u"Unexpected end tag (%(name)s)"
+         u". Expected end of file."),
+    "eof-in-table":
+       _(u"Unexpected end of file. Expected table content."),
+    "eof-in-select":
+       _(u"Unexpected end of file. Expected select content."),
+    "eof-in-frameset":
+       _(u"Unexpected end of file. Expected frameset content."),
+    "eof-in-script-in-script":
+       _(u"Unexpected end of file. Expected script content."),
+    "eof-in-foreign-lands":
+       _(u"Unexpected end of file. Expected foreign content"),
+    "non-void-element-with-trailing-solidus":
+       _(u"Trailing solidus not allowed on element %(name)s"),
+    "unexpected-html-element-in-foreign-content":
+       _(u"Element %(name)s not allowed in a non-html context"),
+    "unexpected-end-tag-before-html":
+        _(u"Unexpected end tag (%(name)s) before html."),
+    "XXX-undefined-error":
+        (u"Undefined error (this sucks and should be fixed)"),
+}
+
+namespaces = {
+    "html":"http://www.w3.org/1999/xhtml",
+    "mathml":"http://www.w3.org/1998/Math/MathML",
+    "svg":"http://www.w3.org/2000/svg",
+    "xlink":"http://www.w3.org/1999/xlink",
+    "xml":"http://www.w3.org/XML/1998/namespace",
+    "xmlns":"http://www.w3.org/2000/xmlns/"
+}
+
+scopingElements = frozenset((
+    (namespaces["html"], "applet"),
+    (namespaces["html"], "caption"),
+    (namespaces["html"], "html"),
+    (namespaces["html"], "marquee"),
+    (namespaces["html"], "object"),
+    (namespaces["html"], "table"),
+    (namespaces["html"], "td"),
+    (namespaces["html"], "th"),
+    (namespaces["mathml"], "mi"),
+    (namespaces["mathml"], "mo"),
+    (namespaces["mathml"], "mn"),
+    (namespaces["mathml"], "ms"),
+    (namespaces["mathml"], "mtext"),
+    (namespaces["mathml"], "annotation-xml"),
+    (namespaces["svg"], "foreignObject"),
+    (namespaces["svg"], "desc"),
+    (namespaces["svg"], "title"),
+))
+
+formattingElements = frozenset((
+    (namespaces["html"], "a"),
+    (namespaces["html"], "b"),
+    (namespaces["html"], "big"),
+    (namespaces["html"], "code"),
+    (namespaces["html"], "em"),
+    (namespaces["html"], "font"),
+    (namespaces["html"], "i"),
+    (namespaces["html"], "nobr"),
+    (namespaces["html"], "s"),
+    (namespaces["html"], "small"),
+    (namespaces["html"], "strike"),
+    (namespaces["html"], "strong"),
+    (namespaces["html"], "tt"),
+    (namespaces["html"], "u")
+))
+
+specialElements = frozenset((
+    (namespaces["html"], "address"),
+    (namespaces["html"], "applet"),
+    (namespaces["html"], "area"),
+    (namespaces["html"], "article"),
+    (namespaces["html"], "aside"),
+    (namespaces["html"], "base"),
+    (namespaces["html"], "basefont"),
+    (namespaces["html"], "bgsound"),
+    (namespaces["html"], "blockquote"),
+    (namespaces["html"], "body"),
+    (namespaces["html"], "br"),
+    (namespaces["html"], "button"),
+    (namespaces["html"], "caption"),
+    (namespaces["html"], "center"),
+    (namespaces["html"], "col"),
+    (namespaces["html"], "colgroup"),
+    (namespaces["html"], "command"),
+    (namespaces["html"], "dd"),
+    (namespaces["html"], "details"),
+    (namespaces["html"], "dir"),
+    (namespaces["html"], "div"),
+    (namespaces["html"], "dl"),
+    (namespaces["html"], "dt"),
+    (namespaces["html"], "embed"),
+    (namespaces["html"], "fieldset"),
+    (namespaces["html"], "figure"),
+    (namespaces["html"], "footer"),
+    (namespaces["html"], "form"),
+    (namespaces["html"], "frame"),
+    (namespaces["html"], "frameset"),
+    (namespaces["html"], "h1"),
+    (namespaces["html"], "h2"),
+    (namespaces["html"], "h3"),
+    (namespaces["html"], "h4"),
+    (namespaces["html"], "h5"),
+    (namespaces["html"], "h6"),
+    (namespaces["html"], "head"),
+    (namespaces["html"], "header"),
+    (namespaces["html"], "hr"),
+    (namespaces["html"], "html"),
+    (namespaces["html"], "iframe"),
+    # Note that image is commented out in the spec as "this isn't an
+    # element that can end up on the stack, so it doesn't matter,"
+    (namespaces["html"], "image"), 
+    (namespaces["html"], "img"),
+    (namespaces["html"], "input"),
+    (namespaces["html"], "isindex"),
+    (namespaces["html"], "li"),
+    (namespaces["html"], "link"),
+    (namespaces["html"], "listing"),
+    (namespaces["html"], "marquee"),
+    (namespaces["html"], "menu"),
+    (namespaces["html"], "meta"),
+    (namespaces["html"], "nav"),
+    (namespaces["html"], "noembed"),
+    (namespaces["html"], "noframes"),
+    (namespaces["html"], "noscript"),
+    (namespaces["html"], "object"),
+    (namespaces["html"], "ol"),
+    (namespaces["html"], "p"),
+    (namespaces["html"], "param"),
+    (namespaces["html"], "plaintext"),
+    (namespaces["html"], "pre"),
+    (namespaces["html"], "script"),
+    (namespaces["html"], "section"),
+    (namespaces["html"], "select"),
+    (namespaces["html"], "style"),
+    (namespaces["html"], "table"),
+    (namespaces["html"], "tbody"),
+    (namespaces["html"], "td"),
+    (namespaces["html"], "textarea"),
+    (namespaces["html"], "tfoot"),
+    (namespaces["html"], "th"),
+    (namespaces["html"], "thead"),
+    (namespaces["html"], "title"),
+    (namespaces["html"], "tr"),
+    (namespaces["html"], "ul"),
+    (namespaces["html"], "wbr"),
+    (namespaces["html"], "xmp"),
+    (namespaces["svg"], "foreignObject")
+))
+
+htmlIntegrationPointElements = frozenset((
+    (namespaces["mathml"], "annotaion-xml"),
+    (namespaces["svg"], "foreignObject"),
+    (namespaces["svg"], "desc"),
+    (namespaces["svg"], "title")
+))
+
+mathmlTextIntegrationPointElements = frozenset((
+    (namespaces["mathml"], "mi"),
+    (namespaces["mathml"], "mo"),
+    (namespaces["mathml"], "mn"),
+    (namespaces["mathml"], "ms"),
+    (namespaces["mathml"], "mtext")
+))
+
+spaceCharacters = frozenset((
+    u"\t",
+    u"\n",
+    u"\u000C",
+    u" ",
+    u"\r"
+))
+
+tableInsertModeElements = frozenset((
+    "table",
+    "tbody",
+    "tfoot",
+    "thead",
+    "tr"
+))
+
+asciiLowercase = frozenset(string.ascii_lowercase)
+asciiUppercase = frozenset(string.ascii_uppercase)
+asciiLetters = frozenset(string.ascii_letters)
+digits = frozenset(string.digits)
+hexDigits = frozenset(string.hexdigits)
+
+asciiUpper2Lower = dict([(ord(c),ord(c.lower()))
+    for c in string.ascii_uppercase])
+
+# Heading elements need to be ordered
+headingElements = (
+    "h1",
+    "h2",
+    "h3",
+    "h4",
+    "h5",
+    "h6"
+)
+
+voidElements = frozenset((
+    "base",
+    "command",
+    "event-source",
+    "link",
+    "meta",
+    "hr",
+    "br",
+    "img",
+    "embed",
+    "param",
+    "area",
+    "col",
+    "input",
+    "source",
+    "track"
+))
+
+cdataElements = frozenset(('title', 'textarea'))
+
+rcdataElements = frozenset((
+    'style',
+    'script',
+    'xmp',
+    'iframe',
+    'noembed',
+    'noframes',
+    'noscript'
+))
+
+booleanAttributes = {
+    "": frozenset(("irrelevant",)),
+    "style": frozenset(("scoped",)),
+    "img": frozenset(("ismap",)),
+    "audio": frozenset(("autoplay","controls")),
+    "video": frozenset(("autoplay","controls")),
+    "script": frozenset(("defer", "async")),
+    "details": frozenset(("open",)),
+    "datagrid": frozenset(("multiple", "disabled")),
+    "command": frozenset(("hidden", "disabled", "checked", "default")),
+    "hr": frozenset(("noshade")),
+    "menu": frozenset(("autosubmit",)),
+    "fieldset": frozenset(("disabled", "readonly")),
+    "option": frozenset(("disabled", "readonly", "selected")),
+    "optgroup": frozenset(("disabled", "readonly")),
+    "button": frozenset(("disabled", "autofocus")),
+    "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
+    "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
+    "output": frozenset(("disabled", "readonly")),
+}
+
+# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
+# therefore can't be a frozenset.
+entitiesWindows1252 = (
+    8364,  # 0x80  0x20AC  EURO SIGN
+    65533, # 0x81          UNDEFINED
+    8218,  # 0x82  0x201A  SINGLE LOW-9 QUOTATION MARK
+    402,   # 0x83  0x0192  LATIN SMALL LETTER F WITH HOOK
+    8222,  # 0x84  0x201E  DOUBLE LOW-9 QUOTATION MARK
+    8230,  # 0x85  0x2026  HORIZONTAL ELLIPSIS
+    8224,  # 0x86  0x2020  DAGGER
+    8225,  # 0x87  0x2021  DOUBLE DAGGER
+    710,   # 0x88  0x02C6  MODIFIER LETTER CIRCUMFLEX ACCENT
+    8240,  # 0x89  0x2030  PER MILLE SIGN
+    352,   # 0x8A  0x0160  LATIN CAPITAL LETTER S WITH CARON
+    8249,  # 0x8B  0x2039  SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    338,   # 0x8C  0x0152  LATIN CAPITAL LIGATURE OE
+    65533, # 0x8D          UNDEFINED
+    381,   # 0x8E  0x017D  LATIN CAPITAL LETTER Z WITH CARON
+    65533, # 0x8F          UNDEFINED
+    65533, # 0x90          UNDEFINED
+    8216,  # 0x91  0x2018  LEFT SINGLE QUOTATION MARK
+    8217,  # 0x92  0x2019  RIGHT SINGLE QUOTATION MARK
+    8220,  # 0x93  0x201C  LEFT DOUBLE QUOTATION MARK
+    8221,  # 0x94  0x201D  RIGHT DOUBLE QUOTATION MARK
+    8226,  # 0x95  0x2022  BULLET
+    8211,  # 0x96  0x2013  EN DASH
+    8212,  # 0x97  0x2014  EM DASH
+    732,   # 0x98  0x02DC  SMALL TILDE
+    8482,  # 0x99  0x2122  TRADE MARK SIGN
+    353,   # 0x9A  0x0161  LATIN SMALL LETTER S WITH CARON
+    8250,  # 0x9B  0x203A  SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    339,   # 0x9C  0x0153  LATIN SMALL LIGATURE OE
+    65533, # 0x9D          UNDEFINED
+    382,   # 0x9E  0x017E  LATIN SMALL LETTER Z WITH CARON
+    376    # 0x9F  0x0178  LATIN CAPITAL LETTER Y WITH DIAERESIS
+)
+
+xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
+
+entities = {
+    "AElig": u"\xc6",
+    "AElig;": u"\xc6",
+    "AMP": u"&",
+    "AMP;": u"&",
+    "Aacute": u"\xc1",
+    "Aacute;": u"\xc1",
+    "Abreve;": u"\u0102",
+    "Acirc": u"\xc2",
+    "Acirc;": u"\xc2",
+    "Acy;": u"\u0410",
+    "Afr;": u"\U0001d504",
+    "Agrave": u"\xc0",
+    "Agrave;": u"\xc0",
+    "Alpha;": u"\u0391",
+    "Amacr;": u"\u0100",
+    "And;": u"\u2a53",
+    "Aogon;": u"\u0104",
+    "Aopf;": u"\U0001d538",
+    "ApplyFunction;": u"\u2061",
+    "Aring": u"\xc5",
+    "Aring;": u"\xc5",
+    "Ascr;": u"\U0001d49c",
+    "Assign;": u"\u2254",
+    "Atilde": u"\xc3",
+    "Atilde;": u"\xc3",
+    "Auml": u"\xc4",
+    "Auml;": u"\xc4",
+    "Backslash;": u"\u2216",
+    "Barv;": u"\u2ae7",
+    "Barwed;": u"\u2306",
+    "Bcy;": u"\u0411",
+    "Because;": u"\u2235",
+    "Bernoullis;": u"\u212c",
+    "Beta;": u"\u0392",
+    "Bfr;": u"\U0001d505",
+    "Bopf;": u"\U0001d539",
+    "Breve;": u"\u02d8",
+    "Bscr;": u"\u212c",
+    "Bumpeq;": u"\u224e",
+    "CHcy;": u"\u0427",
+    "COPY": u"\xa9",
+    "COPY;": u"\xa9",
+    "Cacute;": u"\u0106",
+    "Cap;": u"\u22d2",
+    "CapitalDifferentialD;": u"\u2145",
+    "Cayleys;": u"\u212d",
+    "Ccaron;": u"\u010c",
+    "Ccedil": u"\xc7",
+    "Ccedil;": u"\xc7",
+    "Ccirc;": u"\u0108",
+    "Cconint;": u"\u2230",
+    "Cdot;": u"\u010a",
+    "Cedilla;": u"\xb8",
+    "CenterDot;": u"\xb7",
+    "Cfr;": u"\u212d",
+    "Chi;": u"\u03a7",
+    "CircleDot;": u"\u2299",
+    "CircleMinus;": u"\u2296",
+    "CirclePlus;": u"\u2295",
+    "CircleTimes;": u"\u2297",
+    "ClockwiseContourIntegral;": u"\u2232",
+    "CloseCurlyDoubleQuote;": u"\u201d",
+    "CloseCurlyQuote;": u"\u2019",
+    "Colon;": u"\u2237",
+    "Colone;": u"\u2a74",
+    "Congruent;": u"\u2261",
+    "Conint;": u"\u222f",
+    "ContourIntegral;": u"\u222e",
+    "Copf;": u"\u2102",
+    "Coproduct;": u"\u2210",
+    "CounterClockwiseContourIntegral;": u"\u2233",
+    "Cross;": u"\u2a2f",
+    "Cscr;": u"\U0001d49e",
+    "Cup;": u"\u22d3",
+    "CupCap;": u"\u224d",
+    "DD;": u"\u2145",
+    "DDotrahd;": u"\u2911",
+    "DJcy;": u"\u0402",
+    "DScy;": u"\u0405",
+    "DZcy;": u"\u040f",
+    "Dagger;": u"\u2021",
+    "Darr;": u"\u21a1",
+    "Dashv;": u"\u2ae4",
+    "Dcaron;": u"\u010e",
+    "Dcy;": u"\u0414",
+    "Del;": u"\u2207",
+    "Delta;": u"\u0394",
+    "Dfr;": u"\U0001d507",
+    "DiacriticalAcute;": u"\xb4",
+    "DiacriticalDot;": u"\u02d9",
+    "DiacriticalDoubleAcute;": u"\u02dd",
+    "DiacriticalGrave;": u"`",
+    "DiacriticalTilde;": u"\u02dc",
+    "Diamond;": u"\u22c4",
+    "DifferentialD;": u"\u2146",
+    "Dopf;": u"\U0001d53b",
+    "Dot;": u"\xa8",
+    "DotDot;": u"\u20dc",
+    "DotEqual;": u"\u2250",
+    "DoubleContourIntegral;": u"\u222f",
+    "DoubleDot;": u"\xa8",
+    "DoubleDownArrow;": u"\u21d3",
+    "DoubleLeftArrow;": u"\u21d0",
+    "DoubleLeftRightArrow;": u"\u21d4",
+    "DoubleLeftTee;": u"\u2ae4",
+    "DoubleLongLeftArrow;": u"\u27f8",
+    "DoubleLongLeftRightArrow;": u"\u27fa",
+    "DoubleLongRightArrow;": u"\u27f9",
+    "DoubleRightArrow;": u"\u21d2",
+    "DoubleRightTee;": u"\u22a8",
+    "DoubleUpArrow;": u"\u21d1",
+    "DoubleUpDownArrow;": u"\u21d5",
+    "DoubleVerticalBar;": u"\u2225",
+    "DownArrow;": u"\u2193",
+    "DownArrowBar;": u"\u2913",
+    "DownArrowUpArrow;": u"\u21f5",
+    "DownBreve;": u"\u0311",
+    "DownLeftRightVector;": u"\u2950",
+    "DownLeftTeeVector;": u"\u295e",
+    "DownLeftVector;": u"\u21bd",
+    "DownLeftVectorBar;": u"\u2956",
+    "DownRightTeeVector;": u"\u295f",
+    "DownRightVector;": u"\u21c1",
+    "DownRightVectorBar;": u"\u2957",
+    "DownTee;": u"\u22a4",
+    "DownTeeArrow;": u"\u21a7",
+    "Downarrow;": u"\u21d3",
+    "Dscr;": u"\U0001d49f",
+    "Dstrok;": u"\u0110",
+    "ENG;": u"\u014a",
+    "ETH": u"\xd0",
+    "ETH;": u"\xd0",
+    "Eacute": u"\xc9",
+    "Eacute;": u"\xc9",
+    "Ecaron;": u"\u011a",
+    "Ecirc": u"\xca",
+    "Ecirc;": u"\xca",
+    "Ecy;": u"\u042d",
+    "Edot;": u"\u0116",
+    "Efr;": u"\U0001d508",
+    "Egrave": u"\xc8",
+    "Egrave;": u"\xc8",
+    "Element;": u"\u2208",
+    "Emacr;": u"\u0112",
+    "EmptySmallSquare;": u"\u25fb",
+    "EmptyVerySmallSquare;": u"\u25ab",
+    "Eogon;": u"\u0118",
+    "Eopf;": u"\U0001d53c",
+    "Epsilon;": u"\u0395",
+    "Equal;": u"\u2a75",
+    "EqualTilde;": u"\u2242",
+    "Equilibrium;": u"\u21cc",
+    "Escr;": u"\u2130",
+    "Esim;": u"\u2a73",
+    "Eta;": u"\u0397",
+    "Euml": u"\xcb",
+    "Euml;": u"\xcb",
+    "Exists;": u"\u2203",
+    "ExponentialE;": u"\u2147",
+    "Fcy;": u"\u0424",
+    "Ffr;": u"\U0001d509",
+    "FilledSmallSquare;": u"\u25fc",
+    "FilledVerySmallSquare;": u"\u25aa",
+    "Fopf;": u"\U0001d53d",
+    "ForAll;": u"\u2200",
+    "Fouriertrf;": u"\u2131",
+    "Fscr;": u"\u2131",
+    "GJcy;": u"\u0403",
+    "GT": u">",
+    "GT;": u">",
+    "Gamma;": u"\u0393",
+    "Gammad;": u"\u03dc",
+    "Gbreve;": u"\u011e",
+    "Gcedil;": u"\u0122",
+    "Gcirc;": u"\u011c",
+    "Gcy;": u"\u0413",
+    "Gdot;": u"\u0120",
+    "Gfr;": u"\U0001d50a",
+    "Gg;": u"\u22d9",
+    "Gopf;": u"\U0001d53e",
+    "GreaterEqual;": u"\u2265",
+    "GreaterEqualLess;": u"\u22db",
+    "GreaterFullEqual;": u"\u2267",
+    "GreaterGreater;": u"\u2aa2",
+    "GreaterLess;": u"\u2277",
+    "GreaterSlantEqual;": u"\u2a7e",
+    "GreaterTilde;": u"\u2273",
+    "Gscr;": u"\U0001d4a2",
+    "Gt;": u"\u226b",
+    "HARDcy;": u"\u042a",
+    "Hacek;": u"\u02c7",
+    "Hat;": u"^",
+    "Hcirc;": u"\u0124",
+    "Hfr;": u"\u210c",
+    "HilbertSpace;": u"\u210b",
+    "Hopf;": u"\u210d",
+    "HorizontalLine;": u"\u2500",
+    "Hscr;": u"\u210b",
+    "Hstrok;": u"\u0126",
+    "HumpDownHump;": u"\u224e",
+    "HumpEqual;": u"\u224f",
+    "IEcy;": u"\u0415",
+    "IJlig;": u"\u0132",
+    "IOcy;": u"\u0401",
+    "Iacute": u"\xcd",
+    "Iacute;": u"\xcd",
+    "Icirc": u"\xce",
+    "Icirc;": u"\xce",
+    "Icy;": u"\u0418",
+    "Idot;": u"\u0130",
+    "Ifr;": u"\u2111",
+    "Igrave": u"\xcc",
+    "Igrave;": u"\xcc",
+    "Im;": u"\u2111",
+    "Imacr;": u"\u012a",
+    "ImaginaryI;": u"\u2148",
+    "Implies;": u"\u21d2",
+    "Int;": u"\u222c",
+    "Integral;": u"\u222b",
+    "Intersection;": u"\u22c2",
+    "InvisibleComma;": u"\u2063",
+    "InvisibleTimes;": u"\u2062",
+    "Iogon;": u"\u012e",
+    "Iopf;": u"\U0001d540",
+    "Iota;": u"\u0399",
+    "Iscr;": u"\u2110",
+    "Itilde;": u"\u0128",
+    "Iukcy;": u"\u0406",
+    "Iuml": u"\xcf",
+    "Iuml;": u"\xcf",
+    "Jcirc;": u"\u0134",
+    "Jcy;": u"\u0419",
+    "Jfr;": u"\U0001d50d",
+    "Jopf;": u"\U0001d541",
+    "Jscr;": u"\U0001d4a5",
+    "Jsercy;": u"\u0408",
+    "Jukcy;": u"\u0404",
+    "KHcy;": u"\u0425",
+    "KJcy;": u"\u040c",
+    "Kappa;": u"\u039a",
+    "Kcedil;": u"\u0136",
+    "Kcy;": u"\u041a",
+    "Kfr;": u"\U0001d50e",
+    "Kopf;": u"\U0001d542",
+    "Kscr;": u"\U0001d4a6",
+    "LJcy;": u"\u0409",
+    "LT": u"<",
+    "LT;": u"<",
+    "Lacute;": u"\u0139",
+    "Lambda;": u"\u039b",
+    "Lang;": u"\u27ea",
+    "Laplacetrf;": u"\u2112",
+    "Larr;": u"\u219e",
+    "Lcaron;": u"\u013d",
+    "Lcedil;": u"\u013b",
+    "Lcy;": u"\u041b",
+    "LeftAngleBracket;": u"\u27e8",
+    "LeftArrow;": u"\u2190",
+    "LeftArrowBar;": u"\u21e4",
+    "LeftArrowRightArrow;": u"\u21c6",
+    "LeftCeiling;": u"\u2308",
+    "LeftDoubleBracket;": u"\u27e6",
+    "LeftDownTeeVector;": u"\u2961",
+    "LeftDownVector;": u"\u21c3",
+    "LeftDownVectorBar;": u"\u2959",
+    "LeftFloor;": u"\u230a",
+    "LeftRightArrow;": u"\u2194",
+    "LeftRightVector;": u"\u294e",
+    "LeftTee;": u"\u22a3",
+    "LeftTeeArrow;": u"\u21a4",
+    "LeftTeeVector;": u"\u295a",
+    "LeftTriangle;": u"\u22b2",
+    "LeftTriangleBar;": u"\u29cf",
+    "LeftTriangleEqual;": u"\u22b4",
+    "LeftUpDownVector;": u"\u2951",
+    "LeftUpTeeVector;": u"\u2960",
+    "LeftUpVector;": u"\u21bf",
+    "LeftUpVectorBar;": u"\u2958",
+    "LeftVector;": u"\u21bc",
+    "LeftVectorBar;": u"\u2952",
+    "Leftarrow;": u"\u21d0",
+    "Leftrightarrow;": u"\u21d4",
+    "LessEqualGreater;": u"\u22da",
+    "LessFullEqual;": u"\u2266",
+    "LessGreater;": u"\u2276",
+    "LessLess;": u"\u2aa1",
+    "LessSlantEqual;": u"\u2a7d",
+    "LessTilde;": u"\u2272",
+    "Lfr;": u"\U0001d50f",
+    "Ll;": u"\u22d8",
+    "Lleftarrow;": u"\u21da",
+    "Lmidot;": u"\u013f",
+    "LongLeftArrow;": u"\u27f5",
+    "LongLeftRightArrow;": u"\u27f7",
+    "LongRightArrow;": u"\u27f6",
+    "Longleftarrow;": u"\u27f8",
+    "Longleftrightarrow;": u"\u27fa",
+    "Longrightarrow;": u"\u27f9",
+    "Lopf;": u"\U0001d543",
+    "LowerLeftArrow;": u"\u2199",
+    "LowerRightArrow;": u"\u2198",
+    "Lscr;": u"\u2112",
+    "Lsh;": u"\u21b0",
+    "Lstrok;": u"\u0141",
+    "Lt;": u"\u226a",
+    "Map;": u"\u2905",
+    "Mcy;": u"\u041c",
+    "MediumSpace;": u"\u205f",
+    "Mellintrf;": u"\u2133",
+    "Mfr;": u"\U0001d510",
+    "MinusPlus;": u"\u2213",
+    "Mopf;": u"\U0001d544",
+    "Mscr;": u"\u2133",
+    "Mu;": u"\u039c",
+    "NJcy;": u"\u040a",
+    "Nacute;": u"\u0143",
+    "Ncaron;": u"\u0147",
+    "Ncedil;": u"\u0145",
+    "Ncy;": u"\u041d",
+    "NegativeMediumSpace;": u"\u200b",
+    "NegativeThickSpace;": u"\u200b",
+    "NegativeThinSpace;": u"\u200b",
+    "NegativeVeryThinSpace;": u"\u200b",
+    "NestedGreaterGreater;": u"\u226b",
+    "NestedLessLess;": u"\u226a",
+    "NewLine;": u"\n",
+    "Nfr;": u"\U0001d511",
+    "NoBreak;": u"\u2060",
+    "NonBreakingSpace;": u"\xa0",
+    "Nopf;": u"\u2115",
+    "Not;": u"\u2aec",
+    "NotCongruent;": u"\u2262",
+    "NotCupCap;": u"\u226d",
+    "NotDoubleVerticalBar;": u"\u2226",
+    "NotElement;": u"\u2209",
+    "NotEqual;": u"\u2260",
+    "NotEqualTilde;": u"\u2242\u0338",
+    "NotExists;": u"\u2204",
+    "NotGreater;": u"\u226f",
+    "NotGreaterEqual;": u"\u2271",
+    "NotGreaterFullEqual;": u"\u2267\u0338",
+    "NotGreaterGreater;": u"\u226b\u0338",
+    "NotGreaterLess;": u"\u2279",
+    "NotGreaterSlantEqual;": u"\u2a7e\u0338",
+    "NotGreaterTilde;": u"\u2275",
+    "NotHumpDownHump;": u"\u224e\u0338",
+    "NotHumpEqual;": u"\u224f\u0338",
+    "NotLeftTriangle;": u"\u22ea",
+    "NotLeftTriangleBar;": u"\u29cf\u0338",
+    "NotLeftTriangleEqual;": u"\u22ec",
+    "NotLess;": u"\u226e",
+    "NotLessEqual;": u"\u2270",
+    "NotLessGreater;": u"\u2278",
+    "NotLessLess;": u"\u226a\u0338",
+    "NotLessSlantEqual;": u"\u2a7d\u0338",
+    "NotLessTilde;": u"\u2274",
+    "NotNestedGreaterGreater;": u"\u2aa2\u0338",
+    "NotNestedLessLess;": u"\u2aa1\u0338",
+    "NotPrecedes;": u"\u2280",
+    "NotPrecedesEqual;": u"\u2aaf\u0338",
+    "NotPrecedesSlantEqual;": u"\u22e0",
+    "NotReverseElement;": u"\u220c",
+    "NotRightTriangle;": u"\u22eb",
+    "NotRightTriangleBar;": u"\u29d0\u0338",
+    "NotRightTriangleEqual;": u"\u22ed",
+    "NotSquareSubset;": u"\u228f\u0338",
+    "NotSquareSubsetEqual;": u"\u22e2",
+    "NotSquareSuperset;": u"\u2290\u0338",
+    "NotSquareSupersetEqual;": u"\u22e3",
+    "NotSubset;": u"\u2282\u20d2",
+    "NotSubsetEqual;": u"\u2288",
+    "NotSucceeds;": u"\u2281",
+    "NotSucceedsEqual;": u"\u2ab0\u0338",
+    "NotSucceedsSlantEqual;": u"\u22e1",
+    "NotSucceedsTilde;": u"\u227f\u0338",
+    "NotSuperset;": u"\u2283\u20d2",
+    "NotSupersetEqual;": u"\u2289",
+    "NotTilde;": u"\u2241",
+    "NotTildeEqual;": u"\u2244",
+    "NotTildeFullEqual;": u"\u2247",
+    "NotTildeTilde;": u"\u2249",
+    "NotVerticalBar;": u"\u2224",
+    "Nscr;": u"\U0001d4a9",
+    "Ntilde": u"\xd1",
+    "Ntilde;": u"\xd1",
+    "Nu;": u"\u039d",
+    "OElig;": u"\u0152",
+    "Oacute": u"\xd3",
+    "Oacute;": u"\xd3",
+    "Ocirc": u"\xd4",
+    "Ocirc;": u"\xd4",
+    "Ocy;": u"\u041e",
+    "Odblac;": u"\u0150",
+    "Ofr;": u"\U0001d512",
+    "Ograve": u"\xd2",
+    "Ograve;": u"\xd2",
+    "Omacr;": u"\u014c",
+    "Omega;": u"\u03a9",
+    "Omicron;": u"\u039f",
+    "Oopf;": u"\U0001d546",
+    "OpenCurlyDoubleQuote;": u"\u201c",
+    "OpenCurlyQuote;": u"\u2018",
+    "Or;": u"\u2a54",
+    "Oscr;": u"\U0001d4aa",
+    "Oslash": u"\xd8",
+    "Oslash;": u"\xd8",
+    "Otilde": u"\xd5",
+    "Otilde;": u"\xd5",
+    "Otimes;": u"\u2a37",
+    "Ouml": u"\xd6",
+    "Ouml;": u"\xd6",
+    "OverBar;": u"\u203e",
+    "OverBrace;": u"\u23de",
+    "OverBracket;": u"\u23b4",
+    "OverParenthesis;": u"\u23dc",
+    "PartialD;": u"\u2202",
+    "Pcy;": u"\u041f",
+    "Pfr;": u"\U0001d513",
+    "Phi;": u"\u03a6",
+    "Pi;": u"\u03a0",
+    "PlusMinus;": u"\xb1",
+    "Poincareplane;": u"\u210c",
+    "Popf;": u"\u2119",
+    "Pr;": u"\u2abb",
+    "Precedes;": u"\u227a",
+    "PrecedesEqual;": u"\u2aaf",
+    "PrecedesSlantEqual;": u"\u227c",
+    "PrecedesTilde;": u"\u227e",
+    "Prime;": u"\u2033",
+    "Product;": u"\u220f",
+    "Proportion;": u"\u2237",
+    "Proportional;": u"\u221d",
+    "Pscr;": u"\U0001d4ab",
+    "Psi;": u"\u03a8",
+    "QUOT": u"\"",
+    "QUOT;": u"\"",
+    "Qfr;": u"\U0001d514",
+    "Qopf;": u"\u211a",
+    "Qscr;": u"\U0001d4ac",
+    "RBarr;": u"\u2910",
+    "REG": u"\xae",
+    "REG;": u"\xae",
+    "Racute;": u"\u0154",
+    "Rang;": u"\u27eb",
+    "Rarr;": u"\u21a0",
+    "Rarrtl;": u"\u2916",
+    "Rcaron;": u"\u0158",
+    "Rcedil;": u"\u0156",
+    "Rcy;": u"\u0420",
+    "Re;": u"\u211c",
+    "ReverseElement;": u"\u220b",
+    "ReverseEquilibrium;": u"\u21cb",
+    "ReverseUpEquilibrium;": u"\u296f",
+    "Rfr;": u"\u211c",
+    "Rho;": u"\u03a1",
+    "RightAngleBracket;": u"\u27e9",
+    "RightArrow;": u"\u2192",
+    "RightArrowBar;": u"\u21e5",
+    "RightArrowLeftArrow;": u"\u21c4",
+    "RightCeiling;": u"\u2309",
+    "RightDoubleBracket;": u"\u27e7",
+    "RightDownTeeVector;": u"\u295d",
+    "RightDownVector;": u"\u21c2",
+    "RightDownVectorBar;": u"\u2955",
+    "RightFloor;": u"\u230b",
+    "RightTee;": u"\u22a2",
+    "RightTeeArrow;": u"\u21a6",
+    "RightTeeVector;": u"\u295b",
+    "RightTriangle;": u"\u22b3",
+    "RightTriangleBar;": u"\u29d0",
+    "RightTriangleEqual;": u"\u22b5",
+    "RightUpDownVector;": u"\u294f",
+    "RightUpTeeVector;": u"\u295c",
+    "RightUpVector;": u"\u21be",
+    "RightUpVectorBar;": u"\u2954",
+    "RightVector;": u"\u21c0",
+    "RightVectorBar;": u"\u2953",
+    "Rightarrow;": u"\u21d2",
+    "Ropf;": u"\u211d",
+    "RoundImplies;": u"\u2970",
+    "Rrightarrow;": u"\u21db",
+    "Rscr;": u"\u211b",
+    "Rsh;": u"\u21b1",
+    "RuleDelayed;": u"\u29f4",
+    "SHCHcy;": u"\u0429",
+    "SHcy;": u"\u0428",
+    "SOFTcy;": u"\u042c",
+    "Sacute;": u"\u015a",
+    "Sc;": u"\u2abc",
+    "Scaron;": u"\u0160",
+    "Scedil;": u"\u015e",
+    "Scirc;": u"\u015c",
+    "Scy;": u"\u0421",
+    "Sfr;": u"\U0001d516",
+    "ShortDownArrow;": u"\u2193",
+    "ShortLeftArrow;": u"\u2190",
+    "ShortRightArrow;": u"\u2192",
+    "ShortUpArrow;": u"\u2191",
+    "Sigma;": u"\u03a3",
+    "SmallCircle;": u"\u2218",
+    "Sopf;": u"\U0001d54a",
+    "Sqrt;": u"\u221a",
+    "Square;": u"\u25a1",
+    "SquareIntersection;": u"\u2293",
+    "SquareSubset;": u"\u228f",
+    "SquareSubsetEqual;": u"\u2291",
+    "SquareSuperset;": u"\u2290",
+    "SquareSupersetEqual;": u"\u2292",
+    "SquareUnion;": u"\u2294",
+    "Sscr;": u"\U0001d4ae",
+    "Star;": u"\u22c6",
+    "Sub;": u"\u22d0",
+    "Subset;": u"\u22d0",
+    "SubsetEqual;": u"\u2286",
+    "Succeeds;": u"\u227b",
+    "SucceedsEqual;": u"\u2ab0",
+    "SucceedsSlantEqual;": u"\u227d",
+    "SucceedsTilde;": u"\u227f",
+    "SuchThat;": u"\u220b",
+    "Sum;": u"\u2211",
+    "Sup;": u"\u22d1",
+    "Superset;": u"\u2283",
+    "SupersetEqual;": u"\u2287",
+    "Supset;": u"\u22d1",
+    "THORN": u"\xde",
+    "THORN;": u"\xde",
+    "TRADE;": u"\u2122",
+    "TSHcy;": u"\u040b",
+    "TScy;": u"\u0426",
+    "Tab;": u"\t",
+    "Tau;": u"\u03a4",
+    "Tcaron;": u"\u0164",
+    "Tcedil;": u"\u0162",
+    "Tcy;": u"\u0422",
+    "Tfr;": u"\U0001d517",
+    "Therefore;": u"\u2234",
+    "Theta;": u"\u0398",
+    "ThickSpace;": u"\u205f\u200a",
+    "ThinSpace;": u"\u2009",
+    "Tilde;": u"\u223c",
+    "TildeEqual;": u"\u2243",
+    "TildeFullEqual;": u"\u2245",
+    "TildeTilde;": u"\u2248",
+    "Topf;": u"\U0001d54b",
+    "TripleDot;": u"\u20db",
+    "Tscr;": u"\U0001d4af",
+    "Tstrok;": u"\u0166",
+    "Uacute": u"\xda",
+    "Uacute;": u"\xda",
+    "Uarr;": u"\u219f",
+    "Uarrocir;": u"\u2949",
+    "Ubrcy;": u"\u040e",
+    "Ubreve;": u"\u016c",
+    "Ucirc": u"\xdb",
+    "Ucirc;": u"\xdb",
+    "Ucy;": u"\u0423",
+    "Udblac;": u"\u0170",
+    "Ufr;": u"\U0001d518",
+    "Ugrave": u"\xd9",
+    "Ugrave;": u"\xd9",
+    "Umacr;": u"\u016a",
+    "UnderBar;": u"_",
+    "UnderBrace;": u"\u23df",
+    "UnderBracket;": u"\u23b5",
+    "UnderParenthesis;": u"\u23dd",
+    "Union;": u"\u22c3",
+    "UnionPlus;": u"\u228e",
+    "Uogon;": u"\u0172",
+    "Uopf;": u"\U0001d54c",
+    "UpArrow;": u"\u2191",
+    "UpArrowBar;": u"\u2912",
+    "UpArrowDownArrow;": u"\u21c5",
+    "UpDownArrow;": u"\u2195",
+    "UpEquilibrium;": u"\u296e",
+    "UpTee;": u"\u22a5",
+    "UpTeeArrow;": u"\u21a5",
+    "Uparrow;": u"\u21d1",
+    "Updownarrow;": u"\u21d5",
+    "UpperLeftArrow;": u"\u2196",
+    "UpperRightArrow;": u"\u2197",
+    "Upsi;": u"\u03d2",
+    "Upsilon;": u"\u03a5",
+    "Uring;": u"\u016e",
+    "Uscr;": u"\U0001d4b0",
+    "Utilde;": u"\u0168",
+    "Uuml": u"\xdc",
+    "Uuml;": u"\xdc",
+    "VDash;": u"\u22ab",
+    "Vbar;": u"\u2aeb",
+    "Vcy;": u"\u0412",
+    "Vdash;": u"\u22a9",
+    "Vdashl;": u"\u2ae6",
+    "Vee;": u"\u22c1",
+    "Verbar;": u"\u2016",
+    "Vert;": u"\u2016",
+    "VerticalBar;": u"\u2223",
+    "VerticalLine;": u"|",
+    "VerticalSeparator;": u"\u2758",
+    "VerticalTilde;": u"\u2240",
+    "VeryThinSpace;": u"\u200a",
+    "Vfr;": u"\U0001d519",
+    "Vopf;": u"\U0001d54d",
+    "Vscr;": u"\U0001d4b1",
+    "Vvdash;": u"\u22aa",
+    "Wcirc;": u"\u0174",
+    "Wedge;": u"\u22c0",
+    "Wfr;": u"\U0001d51a",
+    "Wopf;": u"\U0001d54e",
+    "Wscr;": u"\U0001d4b2",
+    "Xfr;": u"\U0001d51b",
+    "Xi;": u"\u039e",
+    "Xopf;": u"\U0001d54f",
+    "Xscr;": u"\U0001d4b3",
+    "YAcy;": u"\u042f",
+    "YIcy;": u"\u0407",
+    "YUcy;": u"\u042e",
+    "Yacute": u"\xdd",
+    "Yacute;": u"\xdd",
+    "Ycirc;": u"\u0176",
+    "Ycy;": u"\u042b",
+    "Yfr;": u"\U0001d51c",
+    "Yopf;": u"\U0001d550",
+    "Yscr;": u"\U0001d4b4",
+    "Yuml;": u"\u0178",
+    "ZHcy;": u"\u0416",
+    "Zacute;": u"\u0179",
+    "Zcaron;": u"\u017d",
+    "Zcy;": u"\u0417",
+    "Zdot;": u"\u017b",
+    "ZeroWidthSpace;": u"\u200b",
+    "Zeta;": u"\u0396",
+    "Zfr;": u"\u2128",
+    "Zopf;": u"\u2124",
+    "Zscr;": u"\U0001d4b5",
+    "aacute": u"\xe1",
+    "aacute;": u"\xe1",
+    "abreve;": u"\u0103",
+    "ac;": u"\u223e",
+    "acE;": u"\u223e\u0333",
+    "acd;": u"\u223f",
+    "acirc": u"\xe2",
+    "acirc;": u"\xe2",
+    "acute": u"\xb4",
+    "acute;": u"\xb4",
+    "acy;": u"\u0430",
+    "aelig": u"\xe6",
+    "aelig;": u"\xe6",
+    "af;": u"\u2061",
+    "afr;": u"\U0001d51e",
+    "agrave": u"\xe0",
+    "agrave;": u"\xe0",
+    "alefsym;": u"\u2135",
+    "aleph;": u"\u2135",
+    "alpha;": u"\u03b1",
+    "amacr;": u"\u0101",
+    "amalg;": u"\u2a3f",
+    "amp": u"&",
+    "amp;": u"&",
+    "and;": u"\u2227",
+    "andand;": u"\u2a55",
+    "andd;": u"\u2a5c",
+    "andslope;": u"\u2a58",
+    "andv;": u"\u2a5a",
+    "ang;": u"\u2220",
+    "ange;": u"\u29a4",
+    "angle;": u"\u2220",
+    "angmsd;": u"\u2221",
+    "angmsdaa;": u"\u29a8",
+    "angmsdab;": u"\u29a9",
+    "angmsdac;": u"\u29aa",
+    "angmsdad;": u"\u29ab",
+    "angmsdae;": u"\u29ac",
+    "angmsdaf;": u"\u29ad",
+    "angmsdag;": u"\u29ae",
+    "angmsdah;": u"\u29af",
+    "angrt;": u"\u221f",
+    "angrtvb;": u"\u22be",
+    "angrtvbd;": u"\u299d",
+    "angsph;": u"\u2222",
+    "angst;": u"\xc5",
+    "angzarr;": u"\u237c",
+    "aogon;": u"\u0105",
+    "aopf;": u"\U0001d552",
+    "ap;": u"\u2248",
+    "apE;": u"\u2a70",
+    "apacir;": u"\u2a6f",
+    "ape;": u"\u224a",
+    "apid;": u"\u224b",
+    "apos;": u"'",
+    "approx;": u"\u2248",
+    "approxeq;": u"\u224a",
+    "aring": u"\xe5",
+    "aring;": u"\xe5",
+    "ascr;": u"\U0001d4b6",
+    "ast;": u"*",
+    "asymp;": u"\u2248",
+    "asympeq;": u"\u224d",
+    "atilde": u"\xe3",
+    "atilde;": u"\xe3",
+    "auml": u"\xe4",
+    "auml;": u"\xe4",
+    "awconint;": u"\u2233",
+    "awint;": u"\u2a11",
+    "bNot;": u"\u2aed",
+    "backcong;": u"\u224c",
+    "backepsilon;": u"\u03f6",
+    "backprime;": u"\u2035",
+    "backsim;": u"\u223d",
+    "backsimeq;": u"\u22cd",
+    "barvee;": u"\u22bd",
+    "barwed;": u"\u2305",
+    "barwedge;": u"\u2305",
+    "bbrk;": u"\u23b5",
+    "bbrktbrk;": u"\u23b6",
+    "bcong;": u"\u224c",
+    "bcy;": u"\u0431",
+    "bdquo;": u"\u201e",
+    "becaus;": u"\u2235",
+    "because;": u"\u2235",
+    "bemptyv;": u"\u29b0",
+    "bepsi;": u"\u03f6",
+    "bernou;": u"\u212c",
+    "beta;": u"\u03b2",
+    "beth;": u"\u2136",
+    "between;": u"\u226c",
+    "bfr;": u"\U0001d51f",
+    "bigcap;": u"\u22c2",
+    "bigcirc;": u"\u25ef",
+    "bigcup;": u"\u22c3",
+    "bigodot;": u"\u2a00",
+    "bigoplus;": u"\u2a01",
+    "bigotimes;": u"\u2a02",
+    "bigsqcup;": u"\u2a06",
+    "bigstar;": u"\u2605",
+    "bigtriangledown;": u"\u25bd",
+    "bigtriangleup;": u"\u25b3",
+    "biguplus;": u"\u2a04",
+    "bigvee;": u"\u22c1",
+    "bigwedge;": u"\u22c0",
+    "bkarow;": u"\u290d",
+    "blacklozenge;": u"\u29eb",
+    "blacksquare;": u"\u25aa",
+    "blacktriangle;": u"\u25b4",
+    "blacktriangledown;": u"\u25be",
+    "blacktriangleleft;": u"\u25c2",
+    "blacktriangleright;": u"\u25b8",
+    "blank;": u"\u2423",
+    "blk12;": u"\u2592",
+    "blk14;": u"\u2591",
+    "blk34;": u"\u2593",
+    "block;": u"\u2588",
+    "bne;": u"=\u20e5",
+    "bnequiv;": u"\u2261\u20e5",
+    "bnot;": u"\u2310",
+    "bopf;": u"\U0001d553",
+    "bot;": u"\u22a5",
+    "bottom;": u"\u22a5",
+    "bowtie;": u"\u22c8",
+    "boxDL;": u"\u2557",
+    "boxDR;": u"\u2554",
+    "boxDl;": u"\u2556",
+    "boxDr;": u"\u2553",
+    "boxH;": u"\u2550",
+    "boxHD;": u"\u2566",
+    "boxHU;": u"\u2569",
+    "boxHd;": u"\u2564",
+    "boxHu;": u"\u2567",
+    "boxUL;": u"\u255d",
+    "boxUR;": u"\u255a",
+    "boxUl;": u"\u255c",
+    "boxUr;": u"\u2559",
+    "boxV;": u"\u2551",
+    "boxVH;": u"\u256c",
+    "boxVL;": u"\u2563",
+    "boxVR;": u"\u2560",
+    "boxVh;": u"\u256b",
+    "boxVl;": u"\u2562",
+    "boxVr;": u"\u255f",
+    "boxbox;": u"\u29c9",
+    "boxdL;": u"\u2555",
+    "boxdR;": u"\u2552",
+    "boxdl;": u"\u2510",
+    "boxdr;": u"\u250c",
+    "boxh;": u"\u2500",
+    "boxhD;": u"\u2565",
+    "boxhU;": u"\u2568",
+    "boxhd;": u"\u252c",
+    "boxhu;": u"\u2534",
+    "boxminus;": u"\u229f",
+    "boxplus;": u"\u229e",
+    "boxtimes;": u"\u22a0",
+    "boxuL;": u"\u255b",
+    "boxuR;": u"\u2558",
+    "boxul;": u"\u2518",
+    "boxur;": u"\u2514",
+    "boxv;": u"\u2502",
+    "boxvH;": u"\u256a",
+    "boxvL;": u"\u2561",
+    "boxvR;": u"\u255e",
+    "boxvh;": u"\u253c",
+    "boxvl;": u"\u2524",
+    "boxvr;": u"\u251c",
+    "bprime;": u"\u2035",
+    "breve;": u"\u02d8",
+    "brvbar": u"\xa6",
+    "brvbar;": u"\xa6",
+    "bscr;": u"\U0001d4b7",
+    "bsemi;": u"\u204f",
+    "bsim;": u"\u223d",
+    "bsime;": u"\u22cd",
+    "bsol;": u"\\",
+    "bsolb;": u"\u29c5",
+    "bsolhsub;": u"\u27c8",
+    "bull;": u"\u2022",
+    "bullet;": u"\u2022",
+    "bump;": u"\u224e",
+    "bumpE;": u"\u2aae",
+    "bumpe;": u"\u224f",
+    "bumpeq;": u"\u224f",
+    "cacute;": u"\u0107",
+    "cap;": u"\u2229",
+    "capand;": u"\u2a44",
+    "capbrcup;": u"\u2a49",
+    "capcap;": u"\u2a4b",
+    "capcup;": u"\u2a47",
+    "capdot;": u"\u2a40",
+    "caps;": u"\u2229\ufe00",
+    "caret;": u"\u2041",
+    "caron;": u"\u02c7",
+    "ccaps;": u"\u2a4d",
+    "ccaron;": u"\u010d",
+    "ccedil": u"\xe7",
+    "ccedil;": u"\xe7",
+    "ccirc;": u"\u0109",
+    "ccups;": u"\u2a4c",
+    "ccupssm;": u"\u2a50",
+    "cdot;": u"\u010b",
+    "cedil": u"\xb8",
+    "cedil;": u"\xb8",
+    "cemptyv;": u"\u29b2",
+    "cent": u"\xa2",
+    "cent;": u"\xa2",
+    "centerdot;": u"\xb7",
+    "cfr;": u"\U0001d520",
+    "chcy;": u"\u0447",
+    "check;": u"\u2713",
+    "checkmark;": u"\u2713",
+    "chi;": u"\u03c7",
+    "cir;": u"\u25cb",
+    "cirE;": u"\u29c3",
+    "circ;": u"\u02c6",
+    "circeq;": u"\u2257",
+    "circlearrowleft;": u"\u21ba",
+    "circlearrowright;": u"\u21bb",
+    "circledR;": u"\xae",
+    "circledS;": u"\u24c8",
+    "circledast;": u"\u229b",
+    "circledcirc;": u"\u229a",
+    "circleddash;": u"\u229d",
+    "cire;": u"\u2257",
+    "cirfnint;": u"\u2a10",
+    "cirmid;": u"\u2aef",
+    "cirscir;": u"\u29c2",
+    "clubs;": u"\u2663",
+    "clubsuit;": u"\u2663",
+    "colon;": u":",
+    "colone;": u"\u2254",
+    "coloneq;": u"\u2254",
+    "comma;": u",",
+    "commat;": u"@",
+    "comp;": u"\u2201",
+    "compfn;": u"\u2218",
+    "complement;": u"\u2201",
+    "complexes;": u"\u2102",
+    "cong;": u"\u2245",
+    "congdot;": u"\u2a6d",
+    "conint;": u"\u222e",
+    "copf;": u"\U0001d554",
+    "coprod;": u"\u2210",
+    "copy": u"\xa9",
+    "copy;": u"\xa9",
+    "copysr;": u"\u2117",
+    "crarr;": u"\u21b5",
+    "cross;": u"\u2717",
+    "cscr;": u"\U0001d4b8",
+    "csub;": u"\u2acf",
+    "csube;": u"\u2ad1",
+    "csup;": u"\u2ad0",
+    "csupe;": u"\u2ad2",
+    "ctdot;": u"\u22ef",
+    "cudarrl;": u"\u2938",
+    "cudarrr;": u"\u2935",
+    "cuepr;": u"\u22de",
+    "cuesc;": u"\u22df",
+    "cularr;": u"\u21b6",
+    "cularrp;": u"\u293d",
+    "cup;": u"\u222a",
+    "cupbrcap;": u"\u2a48",
+    "cupcap;": u"\u2a46",
+    "cupcup;": u"\u2a4a",
+    "cupdot;": u"\u228d",
+    "cupor;": u"\u2a45",
+    "cups;": u"\u222a\ufe00",
+    "curarr;": u"\u21b7",
+    "curarrm;": u"\u293c",
+    "curlyeqprec;": u"\u22de",
+    "curlyeqsucc;": u"\u22df",
+    "curlyvee;": u"\u22ce",
+    "curlywedge;": u"\u22cf",
+    "curren": u"\xa4",
+    "curren;": u"\xa4",
+    "curvearrowleft;": u"\u21b6",
+    "curvearrowright;": u"\u21b7",
+    "cuvee;": u"\u22ce",
+    "cuwed;": u"\u22cf",
+    "cwconint;": u"\u2232",
+    "cwint;": u"\u2231",
+    "cylcty;": u"\u232d",
+    "dArr;": u"\u21d3",
+    "dHar;": u"\u2965",
+    "dagger;": u"\u2020",
+    "daleth;": u"\u2138",
+    "darr;": u"\u2193",
+    "dash;": u"\u2010",
+    "dashv;": u"\u22a3",
+    "dbkarow;": u"\u290f",
+    "dblac;": u"\u02dd",
+    "dcaron;": u"\u010f",
+    "dcy;": u"\u0434",
+    "dd;": u"\u2146",
+    "ddagger;": u"\u2021",
+    "ddarr;": u"\u21ca",
+    "ddotseq;": u"\u2a77",
+    "deg": u"\xb0",
+    "deg;": u"\xb0",
+    "delta;": u"\u03b4",
+    "demptyv;": u"\u29b1",
+    "dfisht;": u"\u297f",
+    "dfr;": u"\U0001d521",
+    "dharl;": u"\u21c3",
+    "dharr;": u"\u21c2",
+    "diam;": u"\u22c4",
+    "diamond;": u"\u22c4",
+    "diamondsuit;": u"\u2666",
+    "diams;": u"\u2666",
+    "die;": u"\xa8",
+    "digamma;": u"\u03dd",
+    "disin;": u"\u22f2",
+    "div;": u"\xf7",
+    "divide": u"\xf7",
+    "divide;": u"\xf7",
+    "divideontimes;": u"\u22c7",
+    "divonx;": u"\u22c7",
+    "djcy;": u"\u0452",
+    "dlcorn;": u"\u231e",
+    "dlcrop;": u"\u230d",
+    "dollar;": u"$",
+    "dopf;": u"\U0001d555",
+    "dot;": u"\u02d9",
+    "doteq;": u"\u2250",
+    "doteqdot;": u"\u2251",
+    "dotminus;": u"\u2238",
+    "dotplus;": u"\u2214",
+    "dotsquare;": u"\u22a1",
+    "doublebarwedge;": u"\u2306",
+    "downarrow;": u"\u2193",
+    "downdownarrows;": u"\u21ca",
+    "downharpoonleft;": u"\u21c3",
+    "downharpoonright;": u"\u21c2",
+    "drbkarow;": u"\u2910",
+    "drcorn;": u"\u231f",
+    "drcrop;": u"\u230c",
+    "dscr;": u"\U0001d4b9",
+    "dscy;": u"\u0455",
+    "dsol;": u"\u29f6",
+    "dstrok;": u"\u0111",
+    "dtdot;": u"\u22f1",
+    "dtri;": u"\u25bf",
+    "dtrif;": u"\u25be",
+    "duarr;": u"\u21f5",
+    "duhar;": u"\u296f",
+    "dwangle;": u"\u29a6",
+    "dzcy;": u"\u045f",
+    "dzigrarr;": u"\u27ff",
+    "eDDot;": u"\u2a77",
+    "eDot;": u"\u2251",
+    "eacute": u"\xe9",
+    "eacute;": u"\xe9",
+    "easter;": u"\u2a6e",
+    "ecaron;": u"\u011b",
+    "ecir;": u"\u2256",
+    "ecirc": u"\xea",
+    "ecirc;": u"\xea",
+    "ecolon;": u"\u2255",
+    "ecy;": u"\u044d",
+    "edot;": u"\u0117",
+    "ee;": u"\u2147",
+    "efDot;": u"\u2252",
+    "efr;": u"\U0001d522",
+    "eg;": u"\u2a9a",
+    "egrave": u"\xe8",
+    "egrave;": u"\xe8",
+    "egs;": u"\u2a96",
+    "egsdot;": u"\u2a98",
+    "el;": u"\u2a99",
+    "elinters;": u"\u23e7",
+    "ell;": u"\u2113",
+    "els;": u"\u2a95",
+    "elsdot;": u"\u2a97",
+    "emacr;": u"\u0113",
+    "empty;": u"\u2205",
+    "emptyset;": u"\u2205",
+    "emptyv;": u"\u2205",
+    "emsp13;": u"\u2004",
+    "emsp14;": u"\u2005",
+    "emsp;": u"\u2003",
+    "eng;": u"\u014b",
+    "ensp;": u"\u2002",
+    "eogon;": u"\u0119",
+    "eopf;": u"\U0001d556",
+    "epar;": u"\u22d5",
+    "eparsl;": u"\u29e3",
+    "eplus;": u"\u2a71",
+    "epsi;": u"\u03b5",
+    "epsilon;": u"\u03b5",
+    "epsiv;": u"\u03f5",
+    "eqcirc;": u"\u2256",
+    "eqcolon;": u"\u2255",
+    "eqsim;": u"\u2242",
+    "eqslantgtr;": u"\u2a96",
+    "eqslantless;": u"\u2a95",
+    "equals;": u"=",
+    "equest;": u"\u225f",
+    "equiv;": u"\u2261",
+    "equivDD;": u"\u2a78",
+    "eqvparsl;": u"\u29e5",
+    "erDot;": u"\u2253",
+    "erarr;": u"\u2971",
+    "escr;": u"\u212f",
+    "esdot;": u"\u2250",
+    "esim;": u"\u2242",
+    "eta;": u"\u03b7",
+    "eth": u"\xf0",
+    "eth;": u"\xf0",
+    "euml": u"\xeb",
+    "euml;": u"\xeb",
+    "euro;": u"\u20ac",
+    "excl;": u"!",
+    "exist;": u"\u2203",
+    "expectation;": u"\u2130",
+    "exponentiale;": u"\u2147",
+    "fallingdotseq;": u"\u2252",
+    "fcy;": u"\u0444",
+    "female;": u"\u2640",
+    "ffilig;": u"\ufb03",
+    "fflig;": u"\ufb00",
+    "ffllig;": u"\ufb04",
+    "ffr;": u"\U0001d523",
+    "filig;": u"\ufb01",
+    "fjlig;": u"fj",
+    "flat;": u"\u266d",
+    "fllig;": u"\ufb02",
+    "fltns;": u"\u25b1",
+    "fnof;": u"\u0192",
+    "fopf;": u"\U0001d557",
+    "forall;": u"\u2200",
+    "fork;": u"\u22d4",
+    "forkv;": u"\u2ad9",
+    "fpartint;": u"\u2a0d",
+    "frac12": u"\xbd",
+    "frac12;": u"\xbd",
+    "frac13;": u"\u2153",
+    "frac14": u"\xbc",
+    "frac14;": u"\xbc",
+    "frac15;": u"\u2155",
+    "frac16;": u"\u2159",
+    "frac18;": u"\u215b",
+    "frac23;": u"\u2154",
+    "frac25;": u"\u2156",
+    "frac34": u"\xbe",
+    "frac34;": u"\xbe",
+    "frac35;": u"\u2157",
+    "frac38;": u"\u215c",
+    "frac45;": u"\u2158",
+    "frac56;": u"\u215a",
+    "frac58;": u"\u215d",
+    "frac78;": u"\u215e",
+    "frasl;": u"\u2044",
+    "frown;": u"\u2322",
+    "fscr;": u"\U0001d4bb",
+    "gE;": u"\u2267",
+    "gEl;": u"\u2a8c",
+    "gacute;": u"\u01f5",
+    "gamma;": u"\u03b3",
+    "gammad;": u"\u03dd",
+    "gap;": u"\u2a86",
+    "gbreve;": u"\u011f",
+    "gcirc;": u"\u011d",
+    "gcy;": u"\u0433",
+    "gdot;": u"\u0121",
+    "ge;": u"\u2265",
+    "gel;": u"\u22db",
+    "geq;": u"\u2265",
+    "geqq;": u"\u2267",
+    "geqslant;": u"\u2a7e",
+    "ges;": u"\u2a7e",
+    "gescc;": u"\u2aa9",
+    "gesdot;": u"\u2a80",
+    "gesdoto;": u"\u2a82",
+    "gesdotol;": u"\u2a84",
+    "gesl;": u"\u22db\ufe00",
+    "gesles;": u"\u2a94",
+    "gfr;": u"\U0001d524",
+    "gg;": u"\u226b",
+    "ggg;": u"\u22d9",
+    "gimel;": u"\u2137",
+    "gjcy;": u"\u0453",
+    "gl;": u"\u2277",
+    "glE;": u"\u2a92",
+    "gla;": u"\u2aa5",
+    "glj;": u"\u2aa4",
+    "gnE;": u"\u2269",
+    "gnap;": u"\u2a8a",
+    "gnapprox;": u"\u2a8a",
+    "gne;": u"\u2a88",
+    "gneq;": u"\u2a88",
+    "gneqq;": u"\u2269",
+    "gnsim;": u"\u22e7",
+    "gopf;": u"\U0001d558",
+    "grave;": u"`",
+    "gscr;": u"\u210a",
+    "gsim;": u"\u2273",
+    "gsime;": u"\u2a8e",
+    "gsiml;": u"\u2a90",
+    "gt": u">",
+    "gt;": u">",
+    "gtcc;": u"\u2aa7",
+    "gtcir;": u"\u2a7a",
+    "gtdot;": u"\u22d7",
+    "gtlPar;": u"\u2995",
+    "gtquest;": u"\u2a7c",
+    "gtrapprox;": u"\u2a86",
+    "gtrarr;": u"\u2978",
+    "gtrdot;": u"\u22d7",
+    "gtreqless;": u"\u22db",
+    "gtreqqless;": u"\u2a8c",
+    "gtrless;": u"\u2277",
+    "gtrsim;": u"\u2273",
+    "gvertneqq;": u"\u2269\ufe00",
+    "gvnE;": u"\u2269\ufe00",
+    "hArr;": u"\u21d4",
+    "hairsp;": u"\u200a",
+    "half;": u"\xbd",
+    "hamilt;": u"\u210b",
+    "hardcy;": u"\u044a",
+    "harr;": u"\u2194",
+    "harrcir;": u"\u2948",
+    "harrw;": u"\u21ad",
+    "hbar;": u"\u210f",
+    "hcirc;": u"\u0125",
+    "hearts;": u"\u2665",
+    "heartsuit;": u"\u2665",
+    "hellip;": u"\u2026",
+    "hercon;": u"\u22b9",
+    "hfr;": u"\U0001d525",
+    "hksearow;": u"\u2925",
+    "hkswarow;": u"\u2926",
+    "hoarr;": u"\u21ff",
+    "homtht;": u"\u223b",
+    "hookleftarrow;": u"\u21a9",
+    "hookrightarrow;": u"\u21aa",
+    "hopf;": u"\U0001d559",
+    "horbar;": u"\u2015",
+    "hscr;": u"\U0001d4bd",
+    "hslash;": u"\u210f",
+    "hstrok;": u"\u0127",
+    "hybull;": u"\u2043",
+    "hyphen;": u"\u2010",
+    "iacute": u"\xed",
+    "iacute;": u"\xed",
+    "ic;": u"\u2063",
+    "icirc": u"\xee",
+    "icirc;": u"\xee",
+    "icy;": u"\u0438",
+    "iecy;": u"\u0435",
+    "iexcl": u"\xa1",
+    "iexcl;": u"\xa1",
+    "iff;": u"\u21d4",
+    "ifr;": u"\U0001d526",
+    "igrave": u"\xec",
+    "igrave;": u"\xec",
+    "ii;": u"\u2148",
+    "iiiint;": u"\u2a0c",
+    "iiint;": u"\u222d",
+    "iinfin;": u"\u29dc",
+    "iiota;": u"\u2129",
+    "ijlig;": u"\u0133",
+    "imacr;": u"\u012b",
+    "image;": u"\u2111",
+    "imagline;": u"\u2110",
+    "imagpart;": u"\u2111",
+    "imath;": u"\u0131",
+    "imof;": u"\u22b7",
+    "imped;": u"\u01b5",
+    "in;": u"\u2208",
+    "incare;": u"\u2105",
+    "infin;": u"\u221e",
+    "infintie;": u"\u29dd",
+    "inodot;": u"\u0131",
+    "int;": u"\u222b",
+    "intcal;": u"\u22ba",
+    "integers;": u"\u2124",
+    "intercal;": u"\u22ba",
+    "intlarhk;": u"\u2a17",
+    "intprod;": u"\u2a3c",
+    "iocy;": u"\u0451",
+    "iogon;": u"\u012f",
+    "iopf;": u"\U0001d55a",
+    "iota;": u"\u03b9",
+    "iprod;": u"\u2a3c",
+    "iquest": u"\xbf",
+    "iquest;": u"\xbf",
+    "iscr;": u"\U0001d4be",
+    "isin;": u"\u2208",
+    "isinE;": u"\u22f9",
+    "isindot;": u"\u22f5",
+    "isins;": u"\u22f4",
+    "isinsv;": u"\u22f3",
+    "isinv;": u"\u2208",
+    "it;": u"\u2062",
+    "itilde;": u"\u0129",
+    "iukcy;": u"\u0456",
+    "iuml": u"\xef",
+    "iuml;": u"\xef",
+    "jcirc;": u"\u0135",
+    "jcy;": u"\u0439",
+    "jfr;": u"\U0001d527",
+    "jmath;": u"\u0237",
+    "jopf;": u"\U0001d55b",
+    "jscr;": u"\U0001d4bf",
+    "jsercy;": u"\u0458",
+    "jukcy;": u"\u0454",
+    "kappa;": u"\u03ba",
+    "kappav;": u"\u03f0",
+    "kcedil;": u"\u0137",
+    "kcy;": u"\u043a",
+    "kfr;": u"\U0001d528",
+    "kgreen;": u"\u0138",
+    "khcy;": u"\u0445",
+    "kjcy;": u"\u045c",
+    "kopf;": u"\U0001d55c",
+    "kscr;": u"\U0001d4c0",
+    "lAarr;": u"\u21da",
+    "lArr;": u"\u21d0",
+    "lAtail;": u"\u291b",
+    "lBarr;": u"\u290e",
+    "lE;": u"\u2266",
+    "lEg;": u"\u2a8b",
+    "lHar;": u"\u2962",
+    "lacute;": u"\u013a",
+    "laemptyv;": u"\u29b4",
+    "lagran;": u"\u2112",
+    "lambda;": u"\u03bb",
+    "lang;": u"\u27e8",
+    "langd;": u"\u2991",
+    "langle;": u"\u27e8",
+    "lap;": u"\u2a85",
+    "laquo": u"\xab",
+    "laquo;": u"\xab",
+    "larr;": u"\u2190",
+    "larrb;": u"\u21e4",
+    "larrbfs;": u"\u291f",
+    "larrfs;": u"\u291d",
+    "larrhk;": u"\u21a9",
+    "larrlp;": u"\u21ab",
+    "larrpl;": u"\u2939",
+    "larrsim;": u"\u2973",
+    "larrtl;": u"\u21a2",
+    "lat;": u"\u2aab",
+    "latail;": u"\u2919",
+    "late;": u"\u2aad",
+    "lates;": u"\u2aad\ufe00",
+    "lbarr;": u"\u290c",
+    "lbbrk;": u"\u2772",
+    "lbrace;": u"{",
+    "lbrack;": u"[",
+    "lbrke;": u"\u298b",
+    "lbrksld;": u"\u298f",
+    "lbrkslu;": u"\u298d",
+    "lcaron;": u"\u013e",
+    "lcedil;": u"\u013c",
+    "lceil;": u"\u2308",
+    "lcub;": u"{",
+    "lcy;": u"\u043b",
+    "ldca;": u"\u2936",
+    "ldquo;": u"\u201c",
+    "ldquor;": u"\u201e",
+    "ldrdhar;": u"\u2967",
+    "ldrushar;": u"\u294b",
+    "ldsh;": u"\u21b2",
+    "le;": u"\u2264",
+    "leftarrow;": u"\u2190",
+    "leftarrowtail;": u"\u21a2",
+    "leftharpoondown;": u"\u21bd",
+    "leftharpoonup;": u"\u21bc",
+    "leftleftarrows;": u"\u21c7",
+    "leftrightarrow;": u"\u2194",
+    "leftrightarrows;": u"\u21c6",
+    "leftrightharpoons;": u"\u21cb",
+    "leftrightsquigarrow;": u"\u21ad",
+    "leftthreetimes;": u"\u22cb",
+    "leg;": u"\u22da",
+    "leq;": u"\u2264",
+    "leqq;": u"\u2266",
+    "leqslant;": u"\u2a7d",
+    "les;": u"\u2a7d",
+    "lescc;": u"\u2aa8",
+    "lesdot;": u"\u2a7f",
+    "lesdoto;": u"\u2a81",
+    "lesdotor;": u"\u2a83",
+    "lesg;": u"\u22da\ufe00",
+    "lesges;": u"\u2a93",
+    "lessapprox;": u"\u2a85",
+    "lessdot;": u"\u22d6",
+    "lesseqgtr;": u"\u22da",
+    "lesseqqgtr;": u"\u2a8b",
+    "lessgtr;": u"\u2276",
+    "lesssim;": u"\u2272",
+    "lfisht;": u"\u297c",
+    "lfloor;": u"\u230a",
+    "lfr;": u"\U0001d529",
+    "lg;": u"\u2276",
+    "lgE;": u"\u2a91",
+    "lhard;": u"\u21bd",
+    "lharu;": u"\u21bc",
+    "lharul;": u"\u296a",
+    "lhblk;": u"\u2584",
+    "ljcy;": u"\u0459",
+    "ll;": u"\u226a",
+    "llarr;": u"\u21c7",
+    "llcorner;": u"\u231e",
+    "llhard;": u"\u296b",
+    "lltri;": u"\u25fa",
+    "lmidot;": u"\u0140",
+    "lmoust;": u"\u23b0",
+    "lmoustache;": u"\u23b0",
+    "lnE;": u"\u2268",
+    "lnap;": u"\u2a89",
+    "lnapprox;": u"\u2a89",
+    "lne;": u"\u2a87",
+    "lneq;": u"\u2a87",
+    "lneqq;": u"\u2268",
+    "lnsim;": u"\u22e6",
+    "loang;": u"\u27ec",
+    "loarr;": u"\u21fd",
+    "lobrk;": u"\u27e6",
+    "longleftarrow;": u"\u27f5",
+    "longleftrightarrow;": u"\u27f7",
+    "longmapsto;": u"\u27fc",
+    "longrightarrow;": u"\u27f6",
+    "looparrowleft;": u"\u21ab",
+    "looparrowright;": u"\u21ac",
+    "lopar;": u"\u2985",
+    "lopf;": u"\U0001d55d",
+    "loplus;": u"\u2a2d",
+    "lotimes;": u"\u2a34",
+    "lowast;": u"\u2217",
+    "lowbar;": u"_",
+    "loz;": u"\u25ca",
+    "lozenge;": u"\u25ca",
+    "lozf;": u"\u29eb",
+    "lpar;": u"(",
+    "lparlt;": u"\u2993",
+    "lrarr;": u"\u21c6",
+    "lrcorner;": u"\u231f",
+    "lrhar;": u"\u21cb",
+    "lrhard;": u"\u296d",
+    "lrm;": u"\u200e",
+    "lrtri;": u"\u22bf",
+    "lsaquo;": u"\u2039",
+    "lscr;": u"\U0001d4c1",
+    "lsh;": u"\u21b0",
+    "lsim;": u"\u2272",
+    "lsime;": u"\u2a8d",
+    "lsimg;": u"\u2a8f",
+    "lsqb;": u"[",
+    "lsquo;": u"\u2018",
+    "lsquor;": u"\u201a",
+    "lstrok;": u"\u0142",
+    "lt": u"<",
+    "lt;": u"<",
+    "ltcc;": u"\u2aa6",
+    "ltcir;": u"\u2a79",
+    "ltdot;": u"\u22d6",
+    "lthree;": u"\u22cb",
+    "ltimes;": u"\u22c9",
+    "ltlarr;": u"\u2976",
+    "ltquest;": u"\u2a7b",
+    "ltrPar;": u"\u2996",
+    "ltri;": u"\u25c3",
+    "ltrie;": u"\u22b4",
+    "ltrif;": u"\u25c2",
+    "lurdshar;": u"\u294a",
+    "luruhar;": u"\u2966",
+    "lvertneqq;": u"\u2268\ufe00",
+    "lvnE;": u"\u2268\ufe00",
+    "mDDot;": u"\u223a",
+    "macr": u"\xaf",
+    "macr;": u"\xaf",
+    "male;": u"\u2642",
+    "malt;": u"\u2720",
+    "maltese;": u"\u2720",
+    "map;": u"\u21a6",
+    "mapsto;": u"\u21a6",
+    "mapstodown;": u"\u21a7",
+    "mapstoleft;": u"\u21a4",
+    "mapstoup;": u"\u21a5",
+    "marker;": u"\u25ae",
+    "mcomma;": u"\u2a29",
+    "mcy;": u"\u043c",
+    "mdash;": u"\u2014",
+    "measuredangle;": u"\u2221",
+    "mfr;": u"\U0001d52a",
+    "mho;": u"\u2127",
+    "micro": u"\xb5",
+    "micro;": u"\xb5",
+    "mid;": u"\u2223",
+    "midast;": u"*",
+    "midcir;": u"\u2af0",
+    "middot": u"\xb7",
+    "middot;": u"\xb7",
+    "minus;": u"\u2212",
+    "minusb;": u"\u229f",
+    "minusd;": u"\u2238",
+    "minusdu;": u"\u2a2a",
+    "mlcp;": u"\u2adb",
+    "mldr;": u"\u2026",
+    "mnplus;": u"\u2213",
+    "models;": u"\u22a7",
+    "mopf;": u"\U0001d55e",
+    "mp;": u"\u2213",
+    "mscr;": u"\U0001d4c2",
+    "mstpos;": u"\u223e",
+    "mu;": u"\u03bc",
+    "multimap;": u"\u22b8",
+    "mumap;": u"\u22b8",
+    "nGg;": u"\u22d9\u0338",
+    "nGt;": u"\u226b\u20d2",
+    "nGtv;": u"\u226b\u0338",
+    "nLeftarrow;": u"\u21cd",
+    "nLeftrightarrow;": u"\u21ce",
+    "nLl;": u"\u22d8\u0338",
+    "nLt;": u"\u226a\u20d2",
+    "nLtv;": u"\u226a\u0338",
+    "nRightarrow;": u"\u21cf",
+    "nVDash;": u"\u22af",
+    "nVdash;": u"\u22ae",
+    "nabla;": u"\u2207",
+    "nacute;": u"\u0144",
+    "nang;": u"\u2220\u20d2",
+    "nap;": u"\u2249",
+    "napE;": u"\u2a70\u0338",
+    "napid;": u"\u224b\u0338",
+    "napos;": u"\u0149",
+    "napprox;": u"\u2249",
+    "natur;": u"\u266e",
+    "natural;": u"\u266e",
+    "naturals;": u"\u2115",
+    "nbsp": u"\xa0",
+    "nbsp;": u"\xa0",
+    "nbump;": u"\u224e\u0338",
+    "nbumpe;": u"\u224f\u0338",
+    "ncap;": u"\u2a43",
+    "ncaron;": u"\u0148",
+    "ncedil;": u"\u0146",
+    "ncong;": u"\u2247",
+    "ncongdot;": u"\u2a6d\u0338",
+    "ncup;": u"\u2a42",
+    "ncy;": u"\u043d",
+    "ndash;": u"\u2013",
+    "ne;": u"\u2260",
+    "neArr;": u"\u21d7",
+    "nearhk;": u"\u2924",
+    "nearr;": u"\u2197",
+    "nearrow;": u"\u2197",
+    "nedot;": u"\u2250\u0338",
+    "nequiv;": u"\u2262",
+    "nesear;": u"\u2928",
+    "nesim;": u"\u2242\u0338",
+    "nexist;": u"\u2204",
+    "nexists;": u"\u2204",
+    "nfr;": u"\U0001d52b",
+    "ngE;": u"\u2267\u0338",
+    "nge;": u"\u2271",
+    "ngeq;": u"\u2271",
+    "ngeqq;": u"\u2267\u0338",
+    "ngeqslant;": u"\u2a7e\u0338",
+    "nges;": u"\u2a7e\u0338",
+    "ngsim;": u"\u2275",
+    "ngt;": u"\u226f",
+    "ngtr;": u"\u226f",
+    "nhArr;": u"\u21ce",
+    "nharr;": u"\u21ae",
+    "nhpar;": u"\u2af2",
+    "ni;": u"\u220b",
+    "nis;": u"\u22fc",
+    "nisd;": u"\u22fa",
+    "niv;": u"\u220b",
+    "njcy;": u"\u045a",
+    "nlArr;": u"\u21cd",
+    "nlE;": u"\u2266\u0338",
+    "nlarr;": u"\u219a",
+    "nldr;": u"\u2025",
+    "nle;": u"\u2270",
+    "nleftarrow;": u"\u219a",
+    "nleftrightarrow;": u"\u21ae",
+    "nleq;": u"\u2270",
+    "nleqq;": u"\u2266\u0338",
+    "nleqslant;": u"\u2a7d\u0338",
+    "nles;": u"\u2a7d\u0338",
+    "nless;": u"\u226e",
+    "nlsim;": u"\u2274",
+    "nlt;": u"\u226e",
+    "nltri;": u"\u22ea",
+    "nltrie;": u"\u22ec",
+    "nmid;": u"\u2224",
+    "nopf;": u"\U0001d55f",
+    "not": u"\xac",
+    "not;": u"\xac",
+    "notin;": u"\u2209",
+    "notinE;": u"\u22f9\u0338",
+    "notindot;": u"\u22f5\u0338",
+    "notinva;": u"\u2209",
+    "notinvb;": u"\u22f7",
+    "notinvc;": u"\u22f6",
+    "notni;": u"\u220c",
+    "notniva;": u"\u220c",
+    "notnivb;": u"\u22fe",
+    "notnivc;": u"\u22fd",
+    "npar;": u"\u2226",
+    "nparallel;": u"\u2226",
+    "nparsl;": u"\u2afd\u20e5",
+    "npart;": u"\u2202\u0338",
+    "npolint;": u"\u2a14",
+    "npr;": u"\u2280",
+    "nprcue;": u"\u22e0",
+    "npre;": u"\u2aaf\u0338",
+    "nprec;": u"\u2280",
+    "npreceq;": u"\u2aaf\u0338",
+    "nrArr;": u"\u21cf",
+    "nrarr;": u"\u219b",
+    "nrarrc;": u"\u2933\u0338",
+    "nrarrw;": u"\u219d\u0338",
+    "nrightarrow;": u"\u219b",
+    "nrtri;": u"\u22eb",
+    "nrtrie;": u"\u22ed",
+    "nsc;": u"\u2281",
+    "nsccue;": u"\u22e1",
+    "nsce;": u"\u2ab0\u0338",
+    "nscr;": u"\U0001d4c3",
+    "nshortmid;": u"\u2224",
+    "nshortparallel;": u"\u2226",
+    "nsim;": u"\u2241",
+    "nsime;": u"\u2244",
+    "nsimeq;": u"\u2244",
+    "nsmid;": u"\u2224",
+    "nspar;": u"\u2226",
+    "nsqsube;": u"\u22e2",
+    "nsqsupe;": u"\u22e3",
+    "nsub;": u"\u2284",
+    "nsubE;": u"\u2ac5\u0338",
+    "nsube;": u"\u2288",
+    "nsubset;": u"\u2282\u20d2",
+    "nsubseteq;": u"\u2288",
+    "nsubseteqq;": u"\u2ac5\u0338",
+    "nsucc;": u"\u2281",
+    "nsucceq;": u"\u2ab0\u0338",
+    "nsup;": u"\u2285",
+    "nsupE;": u"\u2ac6\u0338",
+    "nsupe;": u"\u2289",
+    "nsupset;": u"\u2283\u20d2",
+    "nsupseteq;": u"\u2289",
+    "nsupseteqq;": u"\u2ac6\u0338",
+    "ntgl;": u"\u2279",
+    "ntilde": u"\xf1",
+    "ntilde;": u"\xf1",
+    "ntlg;": u"\u2278",
+    "ntriangleleft;": u"\u22ea",
+    "ntrianglelefteq;": u"\u22ec",
+    "ntriangleright;": u"\u22eb",
+    "ntrianglerighteq;": u"\u22ed",
+    "nu;": u"\u03bd",
+    "num;": u"#",
+    "numero;": u"\u2116",
+    "numsp;": u"\u2007",
+    "nvDash;": u"\u22ad",
+    "nvHarr;": u"\u2904",
+    "nvap;": u"\u224d\u20d2",
+    "nvdash;": u"\u22ac",
+    "nvge;": u"\u2265\u20d2",
+    "nvgt;": u">\u20d2",
+    "nvinfin;": u"\u29de",
+    "nvlArr;": u"\u2902",
+    "nvle;": u"\u2264\u20d2",
+    "nvlt;": u"<\u20d2",
+    "nvltrie;": u"\u22b4\u20d2",
+    "nvrArr;": u"\u2903",
+    "nvrtrie;": u"\u22b5\u20d2",
+    "nvsim;": u"\u223c\u20d2",
+    "nwArr;": u"\u21d6",
+    "nwarhk;": u"\u2923",
+    "nwarr;": u"\u2196",
+    "nwarrow;": u"\u2196",
+    "nwnear;": u"\u2927",
+    "oS;": u"\u24c8",
+    "oacute": u"\xf3",
+    "oacute;": u"\xf3",
+    "oast;": u"\u229b",
+    "ocir;": u"\u229a",
+    "ocirc": u"\xf4",
+    "ocirc;": u"\xf4",
+    "ocy;": u"\u043e",
+    "odash;": u"\u229d",
+    "odblac;": u"\u0151",
+    "odiv;": u"\u2a38",
+    "odot;": u"\u2299",
+    "odsold;": u"\u29bc",
+    "oelig;": u"\u0153",
+    "ofcir;": u"\u29bf",
+    "ofr;": u"\U0001d52c",
+    "ogon;": u"\u02db",
+    "ograve": u"\xf2",
+    "ograve;": u"\xf2",
+    "ogt;": u"\u29c1",
+    "ohbar;": u"\u29b5",
+    "ohm;": u"\u03a9",
+    "oint;": u"\u222e",
+    "olarr;": u"\u21ba",
+    "olcir;": u"\u29be",
+    "olcross;": u"\u29bb",
+    "oline;": u"\u203e",
+    "olt;": u"\u29c0",
+    "omacr;": u"\u014d",
+    "omega;": u"\u03c9",
+    "omicron;": u"\u03bf",
+    "omid;": u"\u29b6",
+    "ominus;": u"\u2296",
+    "oopf;": u"\U0001d560",
+    "opar;": u"\u29b7",
+    "operp;": u"\u29b9",
+    "oplus;": u"\u2295",
+    "or;": u"\u2228",
+    "orarr;": u"\u21bb",
+    "ord;": u"\u2a5d",
+    "order;": u"\u2134",
+    "orderof;": u"\u2134",
+    "ordf": u"\xaa",
+    "ordf;": u"\xaa",
+    "ordm": u"\xba",
+    "ordm;": u"\xba",
+    "origof;": u"\u22b6",
+    "oror;": u"\u2a56",
+    "orslope;": u"\u2a57",
+    "orv;": u"\u2a5b",
+    "oscr;": u"\u2134",
+    "oslash": u"\xf8",
+    "oslash;": u"\xf8",
+    "osol;": u"\u2298",
+    "otilde": u"\xf5",
+    "otilde;": u"\xf5",
+    "otimes;": u"\u2297",
+    "otimesas;": u"\u2a36",
+    "ouml": u"\xf6",
+    "ouml;": u"\xf6",
+    "ovbar;": u"\u233d",
+    "par;": u"\u2225",
+    "para": u"\xb6",
+    "para;": u"\xb6",
+    "parallel;": u"\u2225",
+    "parsim;": u"\u2af3",
+    "parsl;": u"\u2afd",
+    "part;": u"\u2202",
+    "pcy;": u"\u043f",
+    "percnt;": u"%",
+    "period;": u".",
+    "permil;": u"\u2030",
+    "perp;": u"\u22a5",
+    "pertenk;": u"\u2031",
+    "pfr;": u"\U0001d52d",
+    "phi;": u"\u03c6",
+    "phiv;": u"\u03d5",
+    "phmmat;": u"\u2133",
+    "phone;": u"\u260e",
+    "pi;": u"\u03c0",
+    "pitchfork;": u"\u22d4",
+    "piv;": u"\u03d6",
+    "planck;": u"\u210f",
+    "planckh;": u"\u210e",
+    "plankv;": u"\u210f",
+    "plus;": u"+",
+    "plusacir;": u"\u2a23",
+    "plusb;": u"\u229e",
+    "pluscir;": u"\u2a22",
+    "plusdo;": u"\u2214",
+    "plusdu;": u"\u2a25",
+    "pluse;": u"\u2a72",
+    "plusmn": u"\xb1",
+    "plusmn;": u"\xb1",
+    "plussim;": u"\u2a26",
+    "plustwo;": u"\u2a27",
+    "pm;": u"\xb1",
+    "pointint;": u"\u2a15",
+    "popf;": u"\U0001d561",
+    "pound": u"\xa3",
+    "pound;": u"\xa3",
+    "pr;": u"\u227a",
+    "prE;": u"\u2ab3",
+    "prap;": u"\u2ab7",
+    "prcue;": u"\u227c",
+    "pre;": u"\u2aaf",
+    "prec;": u"\u227a",
+    "precapprox;": u"\u2ab7",
+    "preccurlyeq;": u"\u227c",
+    "preceq;": u"\u2aaf",
+    "precnapprox;": u"\u2ab9",
+    "precneqq;": u"\u2ab5",
+    "precnsim;": u"\u22e8",
+    "precsim;": u"\u227e",
+    "prime;": u"\u2032",
+    "primes;": u"\u2119",
+    "prnE;": u"\u2ab5",
+    "prnap;": u"\u2ab9",
+    "prnsim;": u"\u22e8",
+    "prod;": u"\u220f",
+    "profalar;": u"\u232e",
+    "profline;": u"\u2312",
+    "profsurf;": u"\u2313",
+    "prop;": u"\u221d",
+    "propto;": u"\u221d",
+    "prsim;": u"\u227e",
+    "prurel;": u"\u22b0",
+    "pscr;": u"\U0001d4c5",
+    "psi;": u"\u03c8",
+    "puncsp;": u"\u2008",
+    "qfr;": u"\U0001d52e",
+    "qint;": u"\u2a0c",
+    "qopf;": u"\U0001d562",
+    "qprime;": u"\u2057",
+    "qscr;": u"\U0001d4c6",
+    "quaternions;": u"\u210d",
+    "quatint;": u"\u2a16",
+    "quest;": u"?",
+    "questeq;": u"\u225f",
+    "quot": u"\"",
+    "quot;": u"\"",
+    "rAarr;": u"\u21db",
+    "rArr;": u"\u21d2",
+    "rAtail;": u"\u291c",
+    "rBarr;": u"\u290f",
+    "rHar;": u"\u2964",
+    "race;": u"\u223d\u0331",
+    "racute;": u"\u0155",
+    "radic;": u"\u221a",
+    "raemptyv;": u"\u29b3",
+    "rang;": u"\u27e9",
+    "rangd;": u"\u2992",
+    "range;": u"\u29a5",
+    "rangle;": u"\u27e9",
+    "raquo": u"\xbb",
+    "raquo;": u"\xbb",
+    "rarr;": u"\u2192",
+    "rarrap;": u"\u2975",
+    "rarrb;": u"\u21e5",
+    "rarrbfs;": u"\u2920",
+    "rarrc;": u"\u2933",
+    "rarrfs;": u"\u291e",
+    "rarrhk;": u"\u21aa",
+    "rarrlp;": u"\u21ac",
+    "rarrpl;": u"\u2945",
+    "rarrsim;": u"\u2974",
+    "rarrtl;": u"\u21a3",
+    "rarrw;": u"\u219d",
+    "ratail;": u"\u291a",
+    "ratio;": u"\u2236",
+    "rationals;": u"\u211a",
+    "rbarr;": u"\u290d",
+    "rbbrk;": u"\u2773",
+    "rbrace;": u"}",
+    "rbrack;": u"]",
+    "rbrke;": u"\u298c",
+    "rbrksld;": u"\u298e",
+    "rbrkslu;": u"\u2990",
+    "rcaron;": u"\u0159",
+    "rcedil;": u"\u0157",
+    "rceil;": u"\u2309",
+    "rcub;": u"}",
+    "rcy;": u"\u0440",
+    "rdca;": u"\u2937",
+    "rdldhar;": u"\u2969",
+    "rdquo;": u"\u201d",
+    "rdquor;": u"\u201d",
+    "rdsh;": u"\u21b3",
+    "real;": u"\u211c",
+    "realine;": u"\u211b",
+    "realpart;": u"\u211c",
+    "reals;": u"\u211d",
+    "rect;": u"\u25ad",
+    "reg": u"\xae",
+    "reg;": u"\xae",
+    "rfisht;": u"\u297d",
+    "rfloor;": u"\u230b",
+    "rfr;": u"\U0001d52f",
+    "rhard;": u"\u21c1",
+    "rharu;": u"\u21c0",
+    "rharul;": u"\u296c",
+    "rho;": u"\u03c1",
+    "rhov;": u"\u03f1",
+    "rightarrow;": u"\u2192",
+    "rightarrowtail;": u"\u21a3",
+    "rightharpoondown;": u"\u21c1",
+    "rightharpoonup;": u"\u21c0",
+    "rightleftarrows;": u"\u21c4",
+    "rightleftharpoons;": u"\u21cc",
+    "rightrightarrows;": u"\u21c9",
+    "rightsquigarrow;": u"\u219d",
+    "rightthreetimes;": u"\u22cc",
+    "ring;": u"\u02da",
+    "risingdotseq;": u"\u2253",
+    "rlarr;": u"\u21c4",
+    "rlhar;": u"\u21cc",
+    "rlm;": u"\u200f",
+    "rmoust;": u"\u23b1",
+    "rmoustache;": u"\u23b1",
+    "rnmid;": u"\u2aee",
+    "roang;": u"\u27ed",
+    "roarr;": u"\u21fe",
+    "robrk;": u"\u27e7",
+    "ropar;": u"\u2986",
+    "ropf;": u"\U0001d563",
+    "roplus;": u"\u2a2e",
+    "rotimes;": u"\u2a35",
+    "rpar;": u")",
+    "rpargt;": u"\u2994",
+    "rppolint;": u"\u2a12",
+    "rrarr;": u"\u21c9",
+    "rsaquo;": u"\u203a",
+    "rscr;": u"\U0001d4c7",
+    "rsh;": u"\u21b1",
+    "rsqb;": u"]",
+    "rsquo;": u"\u2019",
+    "rsquor;": u"\u2019",
+    "rthree;": u"\u22cc",
+    "rtimes;": u"\u22ca",
+    "rtri;": u"\u25b9",
+    "rtrie;": u"\u22b5",
+    "rtrif;": u"\u25b8",
+    "rtriltri;": u"\u29ce",
+    "ruluhar;": u"\u2968",
+    "rx;": u"\u211e",
+    "sacute;": u"\u015b",
+    "sbquo;": u"\u201a",
+    "sc;": u"\u227b",
+    "scE;": u"\u2ab4",
+    "scap;": u"\u2ab8",
+    "scaron;": u"\u0161",
+    "sccue;": u"\u227d",
+    "sce;": u"\u2ab0",
+    "scedil;": u"\u015f",
+    "scirc;": u"\u015d",
+    "scnE;": u"\u2ab6",
+    "scnap;": u"\u2aba",
+    "scnsim;": u"\u22e9",
+    "scpolint;": u"\u2a13",
+    "scsim;": u"\u227f",
+    "scy;": u"\u0441",
+    "sdot;": u"\u22c5",
+    "sdotb;": u"\u22a1",
+    "sdote;": u"\u2a66",
+    "seArr;": u"\u21d8",
+    "searhk;": u"\u2925",
+    "searr;": u"\u2198",
+    "searrow;": u"\u2198",
+    "sect": u"\xa7",
+    "sect;": u"\xa7",
+    "semi;": u";",
+    "seswar;": u"\u2929",
+    "setminus;": u"\u2216",
+    "setmn;": u"\u2216",
+    "sext;": u"\u2736",
+    "sfr;": u"\U0001d530",
+    "sfrown;": u"\u2322",
+    "sharp;": u"\u266f",
+    "shchcy;": u"\u0449",
+    "shcy;": u"\u0448",
+    "shortmid;": u"\u2223",
+    "shortparallel;": u"\u2225",
+    "shy": u"\xad",
+    "shy;": u"\xad",
+    "sigma;": u"\u03c3",
+    "sigmaf;": u"\u03c2",
+    "sigmav;": u"\u03c2",
+    "sim;": u"\u223c",
+    "simdot;": u"\u2a6a",
+    "sime;": u"\u2243",
+    "simeq;": u"\u2243",
+    "simg;": u"\u2a9e",
+    "simgE;": u"\u2aa0",
+    "siml;": u"\u2a9d",
+    "simlE;": u"\u2a9f",
+    "simne;": u"\u2246",
+    "simplus;": u"\u2a24",
+    "simrarr;": u"\u2972",
+    "slarr;": u"\u2190",
+    "smallsetminus;": u"\u2216",
+    "smashp;": u"\u2a33",
+    "smeparsl;": u"\u29e4",
+    "smid;": u"\u2223",
+    "smile;": u"\u2323",
+    "smt;": u"\u2aaa",
+    "smte;": u"\u2aac",
+    "smtes;": u"\u2aac\ufe00",
+    "softcy;": u"\u044c",
+    "sol;": u"/",
+    "solb;": u"\u29c4",
+    "solbar;": u"\u233f",
+    "sopf;": u"\U0001d564",
+    "spades;": u"\u2660",
+    "spadesuit;": u"\u2660",
+    "spar;": u"\u2225",
+    "sqcap;": u"\u2293",
+    "sqcaps;": u"\u2293\ufe00",
+    "sqcup;": u"\u2294",
+    "sqcups;": u"\u2294\ufe00",
+    "sqsub;": u"\u228f",
+    "sqsube;": u"\u2291",
+    "sqsubset;": u"\u228f",
+    "sqsubseteq;": u"\u2291",
+    "sqsup;": u"\u2290",
+    "sqsupe;": u"\u2292",
+    "sqsupset;": u"\u2290",
+    "sqsupseteq;": u"\u2292",
+    "squ;": u"\u25a1",
+    "square;": u"\u25a1",
+    "squarf;": u"\u25aa",
+    "squf;": u"\u25aa",
+    "srarr;": u"\u2192",
+    "sscr;": u"\U0001d4c8",
+    "ssetmn;": u"\u2216",
+    "ssmile;": u"\u2323",
+    "sstarf;": u"\u22c6",
+    "star;": u"\u2606",
+    "starf;": u"\u2605",
+    "straightepsilon;": u"\u03f5",
+    "straightphi;": u"\u03d5",
+    "strns;": u"\xaf",
+    "sub;": u"\u2282",
+    "subE;": u"\u2ac5",
+    "subdot;": u"\u2abd",
+    "sube;": u"\u2286",
+    "subedot;": u"\u2ac3",
+    "submult;": u"\u2ac1",
+    "subnE;": u"\u2acb",
+    "subne;": u"\u228a",
+    "subplus;": u"\u2abf",
+    "subrarr;": u"\u2979",
+    "subset;": u"\u2282",
+    "subseteq;": u"\u2286",
+    "subseteqq;": u"\u2ac5",
+    "subsetneq;": u"\u228a",
+    "subsetneqq;": u"\u2acb",
+    "subsim;": u"\u2ac7",
+    "subsub;": u"\u2ad5",
+    "subsup;": u"\u2ad3",
+    "succ;": u"\u227b",
+    "succapprox;": u"\u2ab8",
+    "succcurlyeq;": u"\u227d",
+    "succeq;": u"\u2ab0",
+    "succnapprox;": u"\u2aba",
+    "succneqq;": u"\u2ab6",
+    "succnsim;": u"\u22e9",
+    "succsim;": u"\u227f",
+    "sum;": u"\u2211",
+    "sung;": u"\u266a",
+    "sup1": u"\xb9",
+    "sup1;": u"\xb9",
+    "sup2": u"\xb2",
+    "sup2;": u"\xb2",
+    "sup3": u"\xb3",
+    "sup3;": u"\xb3",
+    "sup;": u"\u2283",
+    "supE;": u"\u2ac6",
+    "supdot;": u"\u2abe",
+    "supdsub;": u"\u2ad8",
+    "supe;": u"\u2287",
+    "supedot;": u"\u2ac4",
+    "suphsol;": u"\u27c9",
+    "suphsub;": u"\u2ad7",
+    "suplarr;": u"\u297b",
+    "supmult;": u"\u2ac2",
+    "supnE;": u"\u2acc",
+    "supne;": u"\u228b",
+    "supplus;": u"\u2ac0",
+    "supset;": u"\u2283",
+    "supseteq;": u"\u2287",
+    "supseteqq;": u"\u2ac6",
+    "supsetneq;": u"\u228b",
+    "supsetneqq;": u"\u2acc",
+    "supsim;": u"\u2ac8",
+    "supsub;": u"\u2ad4",
+    "supsup;": u"\u2ad6",
+    "swArr;": u"\u21d9",
+    "swarhk;": u"\u2926",
+    "swarr;": u"\u2199",
+    "swarrow;": u"\u2199",
+    "swnwar;": u"\u292a",
+    "szlig": u"\xdf",
+    "szlig;": u"\xdf",
+    "target;": u"\u2316",
+    "tau;": u"\u03c4",
+    "tbrk;": u"\u23b4",
+    "tcaron;": u"\u0165",
+    "tcedil;": u"\u0163",
+    "tcy;": u"\u0442",
+    "tdot;": u"\u20db",
+    "telrec;": u"\u2315",
+    "tfr;": u"\U0001d531",
+    "there4;": u"\u2234",
+    "therefore;": u"\u2234",
+    "theta;": u"\u03b8",
+    "thetasym;": u"\u03d1",
+    "thetav;": u"\u03d1",
+    "thickapprox;": u"\u2248",
+    "thicksim;": u"\u223c",
+    "thinsp;": u"\u2009",
+    "thkap;": u"\u2248",
+    "thksim;": u"\u223c",
+    "thorn": u"\xfe",
+    "thorn;": u"\xfe",
+    "tilde;": u"\u02dc",
+    "times": u"\xd7",
+    "times;": u"\xd7",
+    "timesb;": u"\u22a0",
+    "timesbar;": u"\u2a31",
+    "timesd;": u"\u2a30",
+    "tint;": u"\u222d",
+    "toea;": u"\u2928",
+    "top;": u"\u22a4",
+    "topbot;": u"\u2336",
+    "topcir;": u"\u2af1",
+    "topf;": u"\U0001d565",
+    "topfork;": u"\u2ada",
+    "tosa;": u"\u2929",
+    "tprime;": u"\u2034",
+    "trade;": u"\u2122",
+    "triangle;": u"\u25b5",
+    "triangledown;": u"\u25bf",
+    "triangleleft;": u"\u25c3",
+    "trianglelefteq;": u"\u22b4",
+    "triangleq;": u"\u225c",
+    "triangleright;": u"\u25b9",
+    "trianglerighteq;": u"\u22b5",
+    "tridot;": u"\u25ec",
+    "trie;": u"\u225c",
+    "triminus;": u"\u2a3a",
+    "triplus;": u"\u2a39",
+    "trisb;": u"\u29cd",
+    "tritime;": u"\u2a3b",
+    "trpezium;": u"\u23e2",
+    "tscr;": u"\U0001d4c9",
+    "tscy;": u"\u0446",
+    "tshcy;": u"\u045b",
+    "tstrok;": u"\u0167",
+    "twixt;": u"\u226c",
+    "twoheadleftarrow;": u"\u219e",
+    "twoheadrightarrow;": u"\u21a0",
+    "uArr;": u"\u21d1",
+    "uHar;": u"\u2963",
+    "uacute": u"\xfa",
+    "uacute;": u"\xfa",
+    "uarr;": u"\u2191",
+    "ubrcy;": u"\u045e",
+    "ubreve;": u"\u016d",
+    "ucirc": u"\xfb",
+    "ucirc;": u"\xfb",
+    "ucy;": u"\u0443",
+    "udarr;": u"\u21c5",
+    "udblac;": u"\u0171",
+    "udhar;": u"\u296e",
+    "ufisht;": u"\u297e",
+    "ufr;": u"\U0001d532",
+    "ugrave": u"\xf9",
+    "ugrave;": u"\xf9",
+    "uharl;": u"\u21bf",
+    "uharr;": u"\u21be",
+    "uhblk;": u"\u2580",
+    "ulcorn;": u"\u231c",
+    "ulcorner;": u"\u231c",
+    "ulcrop;": u"\u230f",
+    "ultri;": u"\u25f8",
+    "umacr;": u"\u016b",
+    "uml": u"\xa8",
+    "uml;": u"\xa8",
+    "uogon;": u"\u0173",
+    "uopf;": u"\U0001d566",
+    "uparrow;": u"\u2191",
+    "updownarrow;": u"\u2195",
+    "upharpoonleft;": u"\u21bf",
+    "upharpoonright;": u"\u21be",
+    "uplus;": u"\u228e",
+    "upsi;": u"\u03c5",
+    "upsih;": u"\u03d2",
+    "upsilon;": u"\u03c5",
+    "upuparrows;": u"\u21c8",
+    "urcorn;": u"\u231d",
+    "urcorner;": u"\u231d",
+    "urcrop;": u"\u230e",
+    "uring;": u"\u016f",
+    "urtri;": u"\u25f9",
+    "uscr;": u"\U0001d4ca",
+    "utdot;": u"\u22f0",
+    "utilde;": u"\u0169",
+    "utri;": u"\u25b5",
+    "utrif;": u"\u25b4",
+    "uuarr;": u"\u21c8",
+    "uuml": u"\xfc",
+    "uuml;": u"\xfc",
+    "uwangle;": u"\u29a7",
+    "vArr;": u"\u21d5",
+    "vBar;": u"\u2ae8",
+    "vBarv;": u"\u2ae9",
+    "vDash;": u"\u22a8",
+    "vangrt;": u"\u299c",
+    "varepsilon;": u"\u03f5",
+    "varkappa;": u"\u03f0",
+    "varnothing;": u"\u2205",
+    "varphi;": u"\u03d5",
+    "varpi;": u"\u03d6",
+    "varpropto;": u"\u221d",
+    "varr;": u"\u2195",
+    "varrho;": u"\u03f1",
+    "varsigma;": u"\u03c2",
+    "varsubsetneq;": u"\u228a\ufe00",
+    "varsubsetneqq;": u"\u2acb\ufe00",
+    "varsupsetneq;": u"\u228b\ufe00",
+    "varsupsetneqq;": u"\u2acc\ufe00",
+    "vartheta;": u"\u03d1",
+    "vartriangleleft;": u"\u22b2",
+    "vartriangleright;": u"\u22b3",
+    "vcy;": u"\u0432",
+    "vdash;": u"\u22a2",
+    "vee;": u"\u2228",
+    "veebar;": u"\u22bb",
+    "veeeq;": u"\u225a",
+    "vellip;": u"\u22ee",
+    "verbar;": u"|",
+    "vert;": u"|",
+    "vfr;": u"\U0001d533",
+    "vltri;": u"\u22b2",
+    "vnsub;": u"\u2282\u20d2",
+    "vnsup;": u"\u2283\u20d2",
+    "vopf;": u"\U0001d567",
+    "vprop;": u"\u221d",
+    "vrtri;": u"\u22b3",
+    "vscr;": u"\U0001d4cb",
+    "vsubnE;": u"\u2acb\ufe00",
+    "vsubne;": u"\u228a\ufe00",
+    "vsupnE;": u"\u2acc\ufe00",
+    "vsupne;": u"\u228b\ufe00",
+    "vzigzag;": u"\u299a",
+    "wcirc;": u"\u0175",
+    "wedbar;": u"\u2a5f",
+    "wedge;": u"\u2227",
+    "wedgeq;": u"\u2259",
+    "weierp;": u"\u2118",
+    "wfr;": u"\U0001d534",
+    "wopf;": u"\U0001d568",
+    "wp;": u"\u2118",
+    "wr;": u"\u2240",
+    "wreath;": u"\u2240",
+    "wscr;": u"\U0001d4cc",
+    "xcap;": u"\u22c2",
+    "xcirc;": u"\u25ef",
+    "xcup;": u"\u22c3",
+    "xdtri;": u"\u25bd",
+    "xfr;": u"\U0001d535",
+    "xhArr;": u"\u27fa",
+    "xharr;": u"\u27f7",
+    "xi;": u"\u03be",
+    "xlArr;": u"\u27f8",
+    "xlarr;": u"\u27f5",
+    "xmap;": u"\u27fc",
+    "xnis;": u"\u22fb",
+    "xodot;": u"\u2a00",
+    "xopf;": u"\U0001d569",
+    "xoplus;": u"\u2a01",
+    "xotime;": u"\u2a02",
+    "xrArr;": u"\u27f9",
+    "xrarr;": u"\u27f6",
+    "xscr;": u"\U0001d4cd",
+    "xsqcup;": u"\u2a06",
+    "xuplus;": u"\u2a04",
+    "xutri;": u"\u25b3",
+    "xvee;": u"\u22c1",
+    "xwedge;": u"\u22c0",
+    "yacute": u"\xfd",
+    "yacute;": u"\xfd",
+    "yacy;": u"\u044f",
+    "ycirc;": u"\u0177",
+    "ycy;": u"\u044b",
+    "yen": u"\xa5",
+    "yen;": u"\xa5",
+    "yfr;": u"\U0001d536",
+    "yicy;": u"\u0457",
+    "yopf;": u"\U0001d56a",
+    "yscr;": u"\U0001d4ce",
+    "yucy;": u"\u044e",
+    "yuml": u"\xff",
+    "yuml;": u"\xff",
+    "zacute;": u"\u017a",
+    "zcaron;": u"\u017e",
+    "zcy;": u"\u0437",
+    "zdot;": u"\u017c",
+    "zeetrf;": u"\u2128",
+    "zeta;": u"\u03b6",
+    "zfr;": u"\U0001d537",
+    "zhcy;": u"\u0436",
+    "zigrarr;": u"\u21dd",
+    "zopf;": u"\U0001d56b",
+    "zscr;": u"\U0001d4cf",
+    "zwj;": u"\u200d",
+    "zwnj;": u"\u200c",
+}
+
+replacementCharacters = {
+    0x0:u"\uFFFD",
+    0x0d:u"\u000D",
+    0x80:u"\u20AC",
+    0x81:u"\u0081",
+    0x81:u"\u0081",
+    0x82:u"\u201A",
+    0x83:u"\u0192",
+    0x84:u"\u201E",
+    0x85:u"\u2026",
+    0x86:u"\u2020",
+    0x87:u"\u2021",
+    0x88:u"\u02C6",
+    0x89:u"\u2030",
+    0x8A:u"\u0160",
+    0x8B:u"\u2039",
+    0x8C:u"\u0152",
+    0x8D:u"\u008D",
+    0x8E:u"\u017D",
+    0x8F:u"\u008F",
+    0x90:u"\u0090",
+    0x91:u"\u2018",
+    0x92:u"\u2019",
+    0x93:u"\u201C",
+    0x94:u"\u201D",
+    0x95:u"\u2022",
+    0x96:u"\u2013",
+    0x97:u"\u2014",
+    0x98:u"\u02DC",
+    0x99:u"\u2122",
+    0x9A:u"\u0161",
+    0x9B:u"\u203A",
+    0x9C:u"\u0153",
+    0x9D:u"\u009D",
+    0x9E:u"\u017E",
+    0x9F:u"\u0178",
+}
+
+encodings = {
+    '437': 'cp437',
+    '850': 'cp850',
+    '852': 'cp852',
+    '855': 'cp855',
+    '857': 'cp857',
+    '860': 'cp860',
+    '861': 'cp861',
+    '862': 'cp862',
+    '863': 'cp863',
+    '865': 'cp865',
+    '866': 'cp866',
+    '869': 'cp869',
+    'ansix341968': 'ascii',
+    'ansix341986': 'ascii',
+    'arabic': 'iso8859-6',
+    'ascii': 'ascii',
+    'asmo708': 'iso8859-6',
+    'big5': 'big5',
+    'big5hkscs': 'big5hkscs',
+    'chinese': 'gbk',
+    'cp037': 'cp037',
+    'cp1026': 'cp1026',
+    'cp154': 'ptcp154',
+    'cp367': 'ascii',
+    'cp424': 'cp424',
+    'cp437': 'cp437',
+    'cp500': 'cp500',
+    'cp775': 'cp775',
+    'cp819': 'windows-1252',
+    'cp850': 'cp850',
+    'cp852': 'cp852',
+    'cp855': 'cp855',
+    'cp857': 'cp857',
+    'cp860': 'cp860',
+    'cp861': 'cp861',
+    'cp862': 'cp862',
+    'cp863': 'cp863',
+    'cp864': 'cp864',
+    'cp865': 'cp865',
+    'cp866': 'cp866',
+    'cp869': 'cp869',
+    'cp936': 'gbk',
+    'cpgr': 'cp869',
+    'cpis': 'cp861',
+    'csascii': 'ascii',
+    'csbig5': 'big5',
+    'cseuckr': 'cp949',
+    'cseucpkdfmtjapanese': 'euc_jp',
+    'csgb2312': 'gbk',
+    'cshproman8': 'hp-roman8',
+    'csibm037': 'cp037',
+    'csibm1026': 'cp1026',
+    'csibm424': 'cp424',
+    'csibm500': 'cp500',
+    'csibm855': 'cp855',
+    'csibm857': 'cp857',
+    'csibm860': 'cp860',
+    'csibm861': 'cp861',
+    'csibm863': 'cp863',
+    'csibm864': 'cp864',
+    'csibm865': 'cp865',
+    'csibm866': 'cp866',
+    'csibm869': 'cp869',
+    'csiso2022jp': 'iso2022_jp',
+    'csiso2022jp2': 'iso2022_jp_2',
+    'csiso2022kr': 'iso2022_kr',
+    'csiso58gb231280': 'gbk',
+    'csisolatin1': 'windows-1252',
+    'csisolatin2': 'iso8859-2',
+    'csisolatin3': 'iso8859-3',
+    'csisolatin4': 'iso8859-4',
+    'csisolatin5': 'windows-1254',
+    'csisolatin6': 'iso8859-10',
+    'csisolatinarabic': 'iso8859-6',
+    'csisolatincyrillic': 'iso8859-5',
+    'csisolatingreek': 'iso8859-7',
+    'csisolatinhebrew': 'iso8859-8',
+    'cskoi8r': 'koi8-r',
+    'csksc56011987': 'cp949',
+    'cspc775baltic': 'cp775',
+    'cspc850multilingual': 'cp850',
+    'cspc862latinhebrew': 'cp862',
+    'cspc8codepage437': 'cp437',
+    'cspcp852': 'cp852',
+    'csptcp154': 'ptcp154',
+    'csshiftjis': 'shift_jis',
+    'csunicode11utf7': 'utf-7',
+    'cyrillic': 'iso8859-5',
+    'cyrillicasian': 'ptcp154',
+    'ebcdiccpbe': 'cp500',
+    'ebcdiccpca': 'cp037',
+    'ebcdiccpch': 'cp500',
+    'ebcdiccphe': 'cp424',
+    'ebcdiccpnl': 'cp037',
+    'ebcdiccpus': 'cp037',
+    'ebcdiccpwt': 'cp037',
+    'ecma114': 'iso8859-6',
+    'ecma118': 'iso8859-7',
+    'elot928': 'iso8859-7',
+    'eucjp': 'euc_jp',
+    'euckr': 'cp949',
+    'extendedunixcodepackedformatforjapanese': 'euc_jp',
+    'gb18030': 'gb18030',
+    'gb2312': 'gbk',
+    'gb231280': 'gbk',
+    'gbk': 'gbk',
+    'greek': 'iso8859-7',
+    'greek8': 'iso8859-7',
+    'hebrew': 'iso8859-8',
+    'hproman8': 'hp-roman8',
+    'hzgb2312': 'hz',
+    'ibm037': 'cp037',
+    'ibm1026': 'cp1026',
+    'ibm367': 'ascii',
+    'ibm424': 'cp424',
+    'ibm437': 'cp437',
+    'ibm500': 'cp500',
+    'ibm775': 'cp775',
+    'ibm819': 'windows-1252',
+    'ibm850': 'cp850',
+    'ibm852': 'cp852',
+    'ibm855': 'cp855',
+    'ibm857': 'cp857',
+    'ibm860': 'cp860',
+    'ibm861': 'cp861',
+    'ibm862': 'cp862',
+    'ibm863': 'cp863',
+    'ibm864': 'cp864',
+    'ibm865': 'cp865',
+    'ibm866': 'cp866',
+    'ibm869': 'cp869',
+    'iso2022jp': 'iso2022_jp',
+    'iso2022jp2': 'iso2022_jp_2',
+    'iso2022kr': 'iso2022_kr',
+    'iso646irv1991': 'ascii',
+    'iso646us': 'ascii',
+    'iso88591': 'windows-1252',
+    'iso885910': 'iso8859-10',
+    'iso8859101992': 'iso8859-10',
+    'iso885911987': 'windows-1252',
+    'iso885913': 'iso8859-13',
+    'iso885914': 'iso8859-14',
+    'iso8859141998': 'iso8859-14',
+    'iso885915': 'iso8859-15',
+    'iso885916': 'iso8859-16',
+    'iso8859162001': 'iso8859-16',
+    'iso88592': 'iso8859-2',
+    'iso885921987': 'iso8859-2',
+    'iso88593': 'iso8859-3',
+    'iso885931988': 'iso8859-3',
+    'iso88594': 'iso8859-4',
+    'iso885941988': 'iso8859-4',
+    'iso88595': 'iso8859-5',
+    'iso885951988': 'iso8859-5',
+    'iso88596': 'iso8859-6',
+    'iso885961987': 'iso8859-6',
+    'iso88597': 'iso8859-7',
+    'iso885971987': 'iso8859-7',
+    'iso88598': 'iso8859-8',
+    'iso885981988': 'iso8859-8',
+    'iso88599': 'windows-1254',
+    'iso885991989': 'windows-1254',
+    'isoceltic': 'iso8859-14',
+    'isoir100': 'windows-1252',
+    'isoir101': 'iso8859-2',
+    'isoir109': 'iso8859-3',
+    'isoir110': 'iso8859-4',
+    'isoir126': 'iso8859-7',
+    'isoir127': 'iso8859-6',
+    'isoir138': 'iso8859-8',
+    'isoir144': 'iso8859-5',
+    'isoir148': 'windows-1254',
+    'isoir149': 'cp949',
+    'isoir157': 'iso8859-10',
+    'isoir199': 'iso8859-14',
+    'isoir226': 'iso8859-16',
+    'isoir58': 'gbk',
+    'isoir6': 'ascii',
+    'koi8r': 'koi8-r',
+    'koi8u': 'koi8-u',
+    'korean': 'cp949',
+    'ksc5601': 'cp949',
+    'ksc56011987': 'cp949',
+    'ksc56011989': 'cp949',
+    'l1': 'windows-1252',
+    'l10': 'iso8859-16',
+    'l2': 'iso8859-2',
+    'l3': 'iso8859-3',
+    'l4': 'iso8859-4',
+    'l5': 'windows-1254',
+    'l6': 'iso8859-10',
+    'l8': 'iso8859-14',
+    'latin1': 'windows-1252',
+    'latin10': 'iso8859-16',
+    'latin2': 'iso8859-2',
+    'latin3': 'iso8859-3',
+    'latin4': 'iso8859-4',
+    'latin5': 'windows-1254',
+    'latin6': 'iso8859-10',
+    'latin8': 'iso8859-14',
+    'latin9': 'iso8859-15',
+    'ms936': 'gbk',
+    'mskanji': 'shift_jis',
+    'pt154': 'ptcp154',
+    'ptcp154': 'ptcp154',
+    'r8': 'hp-roman8',
+    'roman8': 'hp-roman8',
+    'shiftjis': 'shift_jis',
+    'tis620': 'cp874',
+    'unicode11utf7': 'utf-7',
+    'us': 'ascii',
+    'usascii': 'ascii',
+    'utf16': 'utf-16',
+    'utf16be': 'utf-16-be',
+    'utf16le': 'utf-16-le',
+    'utf8': 'utf-8',
+    'windows1250': 'cp1250',
+    'windows1251': 'cp1251',
+    'windows1252': 'cp1252',
+    'windows1253': 'cp1253',
+    'windows1254': 'cp1254',
+    'windows1255': 'cp1255',
+    'windows1256': 'cp1256',
+    'windows1257': 'cp1257',
+    'windows1258': 'cp1258',
+    'windows936': 'gbk',
+    'x-x-big5': 'big5'}
+
+tokenTypes = {
+    "Doctype":0,
+    "Characters":1,
+    "SpaceCharacters":2,
+    "StartTag":3,
+    "EndTag":4,
+    "EmptyTag":5,
+    "Comment":6,
+    "ParseError":7
+}
+
+tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"], 
+                           tokenTypes["EmptyTag"]))
+
+
+prefixes = dict([(v,k) for k,v in namespaces.iteritems()])
+prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
+
+class DataLossWarning(UserWarning):
+    pass
+
+class ReparseException(Exception):
+    pass
diff --git a/html5lib/filters/__init__.py b/html5lib/filters/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/html5lib/filters/_base.py b/html5lib/filters/_base.py
new file mode 100644
index 00000000..bca94ada
--- /dev/null
+++ b/html5lib/filters/_base.py
@@ -0,0 +1,10 @@
+
+class Filter(object):
+    def __init__(self, source):
+        self.source = source
+
+    def __iter__(self):
+        return iter(self.source)
+
+    def __getattr__(self, name):
+        return getattr(self.source, name)
diff --git a/html5lib/filters/formfiller.py b/html5lib/filters/formfiller.py
new file mode 100644
index 00000000..94001714
--- /dev/null
+++ b/html5lib/filters/formfiller.py
@@ -0,0 +1,127 @@
+#
+# The goal is to finally have a form filler where you pass data for
+# each form, using the algorithm for "Seeding a form with initial values"
+# See http://www.whatwg.org/specs/web-forms/current-work/#seeding
+#
+
+import _base
+
+from html5lib.constants import spaceCharacters
+spaceCharacters = u"".join(spaceCharacters)
+
+class SimpleFilter(_base.Filter):
+    def __init__(self, source, fieldStorage):
+        _base.Filter.__init__(self, source)
+        self.fieldStorage = fieldStorage
+
+    def __iter__(self):
+        field_indices = {}
+        state = None
+        field_name = None
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type in ("StartTag", "EmptyTag"):
+                name = token["name"].lower()
+                if name == "input":
+                    field_name = None
+                    field_type = None
+                    input_value_index = -1
+                    input_checked_index = -1
+                    for i,(n,v) in enumerate(token["data"]):
+                        n = n.lower()
+                        if n == u"name":
+                            field_name = v.strip(spaceCharacters)
+                        elif n == u"type":
+                            field_type = v.strip(spaceCharacters)
+                        elif n == u"checked":
+                            input_checked_index = i
+                        elif n == u"value":
+                            input_value_index = i
+
+                    value_list = self.fieldStorage.getlist(field_name)
+                    field_index = field_indices.setdefault(field_name, 0)
+                    if field_index < len(value_list):
+                        value = value_list[field_index]
+                    else:
+                        value = ""
+
+                    if field_type in (u"checkbox", u"radio"):
+                        if value_list:
+                            if token["data"][input_value_index][1] == value:
+                                if input_checked_index < 0:
+                                    token["data"].append((u"checked", u""))
+                                field_indices[field_name] = field_index + 1
+                            elif input_checked_index >= 0:
+                                del token["data"][input_checked_index]
+
+                    elif field_type not in (u"button", u"submit", u"reset"):
+                        if input_value_index >= 0:
+                            token["data"][input_value_index] = (u"value", value)
+                        else:
+                            token["data"].append((u"value", value))
+                        field_indices[field_name] = field_index + 1
+
+                    field_type = None
+                    field_name = None
+
+                elif name == "textarea":
+                    field_type = "textarea"
+                    field_name = dict((token["data"])[::-1])["name"]
+
+                elif name == "select":
+                    field_type = "select"
+                    attributes = dict(token["data"][::-1])
+                    field_name = attributes.get("name")
+                    is_select_multiple = "multiple" in attributes
+                    is_selected_option_found = False
+
+                elif field_type == "select" and field_name and name == "option":
+                    option_selected_index = -1
+                    option_value = None
+                    for i,(n,v) in enumerate(token["data"]):
+                        n = n.lower()
+                        if n == "selected":
+                            option_selected_index = i
+                        elif n == "value":
+                            option_value = v.strip(spaceCharacters)
+                    if option_value is None:
+                        raise NotImplementedError("<option>s without a value= attribute")
+                    else:
+                        value_list = self.fieldStorage.getlist(field_name)
+                        if value_list:
+                            field_index = field_indices.setdefault(field_name, 0)
+                            if field_index < len(value_list):
+                                value = value_list[field_index]
+                            else:
+                                value = ""
+                            if (is_select_multiple or not is_selected_option_found) and option_value == value:
+                                if option_selected_index < 0:
+                                    token["data"].append((u"selected", u""))
+                                field_indices[field_name] = field_index + 1
+                                is_selected_option_found = True
+                            elif option_selected_index >= 0:
+                                del token["data"][option_selected_index]
+
+            elif field_type is not None and field_name and type == "EndTag":
+                name = token["name"].lower()
+                if name == field_type:
+                    if name == "textarea":
+                        value_list = self.fieldStorage.getlist(field_name)
+                        if value_list:
+                            field_index = field_indices.setdefault(field_name, 0)
+                            if field_index < len(value_list):
+                                value = value_list[field_index]
+                            else:
+                                value = ""
+                            yield {"type": "Characters", "data": value}
+                            field_indices[field_name] = field_index + 1
+
+                    field_name = None
+
+                elif name == "option" and field_type == "select":
+                    pass # TODO: part of "option without value= attribute" processing
+
+            elif field_type == "textarea":
+                continue # ignore token
+
+            yield token
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
new file mode 100644
index 00000000..8e04d8ac
--- /dev/null
+++ b/html5lib/filters/inject_meta_charset.py
@@ -0,0 +1,62 @@
+import _base
+
+class Filter(_base.Filter):
+    def __init__(self, source, encoding):
+        _base.Filter.__init__(self, source)
+        self.encoding = encoding
+
+    def __iter__(self):
+        state = "pre_head"
+        meta_found = (self.encoding is None)
+        pending = []
+
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag":
+                if token["name"].lower() == u"head":
+                    state = "in_head"
+
+            elif type == "EmptyTag":
+                if token["name"].lower() == u"meta":
+                   # replace charset with actual encoding
+                   has_http_equiv_content_type = False
+                   for (namespace,name),value in token["data"].iteritems():
+                       if namespace != None:
+                           continue
+                       elif name.lower() == u'charset':
+                          token["data"][(namespace,name)] = self.encoding
+                          meta_found = True
+                          break
+                       elif name == u'http-equiv' and value.lower() == u'content-type':
+                           has_http_equiv_content_type = True
+                   else:
+                       if has_http_equiv_content_type and (None, u"content") in token["data"]:
+                           token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
+                           meta_found = True
+
+                elif token["name"].lower() == u"head" and not meta_found:
+                    # insert meta into empty head
+                    yield {"type": "StartTag", "name": u"head",
+                           "data": token["data"]}
+                    yield {"type": "EmptyTag", "name": u"meta",
+                           "data": {(None, u"charset"): self.encoding}}
+                    yield {"type": "EndTag", "name": u"head"}
+                    meta_found = True
+                    continue
+
+            elif type == "EndTag":
+                if token["name"].lower() == u"head" and pending:
+                    # insert meta into head (if necessary) and flush pending queue
+                    yield pending.pop(0)
+                    if not meta_found:
+                        yield {"type": "EmptyTag", "name": u"meta",
+                               "data": {(None, u"charset"): self.encoding}}
+                    while pending:
+                        yield pending.pop(0)
+                    meta_found = True
+                    state = "post_head"
+
+            if state == "in_head":
+                pending.append(token)
+            else:
+                yield token
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
new file mode 100644
index 00000000..ea5c619f
--- /dev/null
+++ b/html5lib/filters/lint.py
@@ -0,0 +1,88 @@
+from gettext import gettext
+_ = gettext
+
+import _base
+from html5lib.constants import cdataElements, rcdataElements, voidElements
+
+from html5lib.constants import spaceCharacters
+spaceCharacters = u"".join(spaceCharacters)
+
+class LintError(Exception): pass
+
+class Filter(_base.Filter):
+    def __iter__(self):
+        open_elements = []
+        contentModelFlag = "PCDATA"
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("StartTag not in PCDATA content model flag: %s") % name)
+                if not isinstance(name, unicode):
+                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                if not name:
+                    raise LintError(_(u"Empty tag name"))
+                if type == "StartTag" and name in voidElements:
+                    raise LintError(_(u"Void element reported as StartTag token: %s") % name)
+                elif type == "EmptyTag" and name not in voidElements:
+                    raise LintError(_(u"Non-void element reported as EmptyTag token: %s") % token["name"])
+                if type == "StartTag":
+                    open_elements.append(name)
+                for name, value in token["data"]:
+                    if not isinstance(name, unicode):
+                        raise LintError(_("Attribute name is not a string: %r") % name)
+                    if not name:
+                        raise LintError(_(u"Empty attribute name"))
+                    if not isinstance(value, unicode):
+                        raise LintError(_("Attribute value is not a string: %r") % value)
+                if name in cdataElements:
+                    contentModelFlag = "CDATA"
+                elif name in rcdataElements:
+                    contentModelFlag = "RCDATA"
+                elif name == "plaintext":
+                    contentModelFlag = "PLAINTEXT"
+
+            elif type == "EndTag":
+                name = token["name"]
+                if not isinstance(name, unicode):
+                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                if not name:
+                    raise LintError(_(u"Empty tag name"))
+                if name in voidElements:
+                    raise LintError(_(u"Void element reported as EndTag token: %s") % name)
+                start_name = open_elements.pop()
+                if start_name != name:
+                    raise LintError(_(u"EndTag (%s) does not match StartTag (%s)") % (name, start_name))
+                contentModelFlag = "PCDATA"
+
+            elif type == "Comment":
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("Comment not in PCDATA content model flag"))
+
+            elif type in ("Characters", "SpaceCharacters"):
+                data = token["data"]
+                if not isinstance(data, unicode):
+                    raise LintError(_("Attribute name is not a string: %r") % data)
+                if not data:
+                    raise LintError(_(u"%s token with empty data") % type)
+                if type == "SpaceCharacters":
+                    data = data.strip(spaceCharacters)
+                    if data:
+                        raise LintError(_(u"Non-space character(s) found in SpaceCharacters token: ") % data)
+
+            elif type == "Doctype":
+                name = token["name"]
+                if contentModelFlag != "PCDATA":
+                    raise LintError(_("Doctype not in PCDATA content model flag: %s") % name)
+                if not isinstance(name, unicode):
+                    raise LintError(_(u"Tag name is not a string: %r") % name)
+                # XXX: what to do with token["data"] ?
+
+            elif type in ("ParseError", "SerializeError"):
+                pass
+
+            else:
+                raise LintError(_(u"Unknown token type: %s") % type)
+
+            yield token
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
new file mode 100644
index 00000000..a77aa72c
--- /dev/null
+++ b/html5lib/filters/optionaltags.py
@@ -0,0 +1,202 @@
+import _base
+
+class Filter(_base.Filter):
+    def slider(self):
+        previous1 = previous2 = None
+        for token in self.source:
+            if previous1 is not None:
+                yield previous2, previous1, token
+            previous2 = previous1
+            previous1 = token
+        yield previous2, previous1, None
+
+    def __iter__(self):
+        for previous, token, next in self.slider():
+            type = token["type"]
+            if type == "StartTag":
+                if (token["data"] or 
+                    not self.is_optional_start(token["name"], previous, next)):
+                    yield token
+            elif type == "EndTag":
+                if not self.is_optional_end(token["name"], next):
+                    yield token
+            else:
+                yield token
+
+    def is_optional_start(self, tagname, previous, next):
+        type = next and next["type"] or None
+        if tagname in 'html':
+            # An html element's start tag may be omitted if the first thing
+            # inside the html element is not a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname == 'head':
+            # A head element's start tag may be omitted if the first thing
+            # inside the head element is an element.
+            # XXX: we also omit the start tag if the head element is empty
+            if type in ("StartTag", "EmptyTag"):
+                return True
+            elif type == "EndTag":
+                return next["name"] == "head"
+        elif tagname == 'body':
+            # A body element's start tag may be omitted if the first thing
+            # inside the body element is not a space character or a comment,
+            # except if the first thing inside the body element is a script
+            # or style element and the node immediately preceding the body
+            # element is a head element whose end tag has been omitted.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we do not look at the preceding event, so we never omit
+                # the body element's start tag if it's followed by a script or
+                # a style element.
+                return next["name"] not in ('script', 'style')
+            else:
+                return True
+        elif tagname == 'colgroup':
+            # A colgroup element's start tag may be omitted if the first thing
+            # inside the colgroup element is a col element, and if the element
+            # is not immediately preceeded by another colgroup element whose
+            # end tag has been omitted.
+            if type in ("StartTag", "EmptyTag"):
+                # XXX: we do not look at the preceding event, so instead we never
+                # omit the colgroup element's end tag when it is immediately
+                # followed by another colgroup element. See is_optional_end.
+                return next["name"] == "col"
+            else:
+                return False
+        elif tagname == 'tbody':
+            # A tbody element's start tag may be omitted if the first thing
+            # inside the tbody element is a tr element, and if the element is
+            # not immediately preceeded by a tbody, thead, or tfoot element
+            # whose end tag has been omitted.
+            if type == "StartTag":
+                # omit the thead and tfoot elements' end tag when they are
+                # immediately followed by a tbody element. See is_optional_end.
+                if previous and previous['type'] == 'EndTag' and \
+                  previous['name'] in ('tbody','thead','tfoot'):
+                    return False
+                return next["name"] == 'tr'
+            else:
+                return False
+        return False
+
+    def is_optional_end(self, tagname, next):
+        type = next and next["type"] or None
+        if tagname in ('html', 'head', 'body'):
+            # An html element's end tag may be omitted if the html element
+            # is not immediately followed by a space character or a comment.
+            return type not in ("Comment", "SpaceCharacters")
+        elif tagname in ('li', 'optgroup', 'tr'):
+            # A li element's end tag may be omitted if the li element is
+            # immediately followed by another li element or if there is
+            # no more content in the parent element.
+            # An optgroup element's end tag may be omitted if the optgroup
+            # element is immediately followed by another optgroup element,
+            # or if there is no more content in the parent element.
+            # A tr element's end tag may be omitted if the tr element is
+            # immediately followed by another tr element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] == tagname
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('dt', 'dd'):
+            # A dt element's end tag may be omitted if the dt element is
+            # immediately followed by another dt element or a dd element.
+            # A dd element's end tag may be omitted if the dd element is
+            # immediately followed by another dd element or a dt element,
+            # or if there is no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('dt', 'dd')
+            elif tagname == 'dd':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'p':
+            # A p element's end tag may be omitted if the p element is
+            # immediately followed by an address, article, aside,
+            # blockquote, datagrid, dialog, dir, div, dl, fieldset,
+            # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu,
+            # nav, ol, p, pre, section, table, or ul, element, or if
+            # there is no more content in the parent element.
+            if type in ("StartTag", "EmptyTag"):
+                return next["name"] in ('address', 'article', 'aside',
+                                        'blockquote', 'datagrid', 'dialog', 
+                                        'dir', 'div', 'dl', 'fieldset', 'footer',
+                                        'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+                                        'header', 'hr', 'menu', 'nav', 'ol', 
+                                        'p', 'pre', 'section', 'table', 'ul')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'option':
+            # An option element's end tag may be omitted if the option
+            # element is immediately followed by another option element,
+            # or if it is immediately followed by an <code>optgroup</code>
+            # element, or if there is no more content in the parent
+            # element.
+            if type == "StartTag":
+                return next["name"] in ('option', 'optgroup')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('rt', 'rp'):
+            # An rt element's end tag may be omitted if the rt element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            # An rp element's end tag may be omitted if the rp element is
+            # immediately followed by an rt or rp element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('rt', 'rp')
+            else:
+                return type == "EndTag" or type is None
+        elif tagname == 'colgroup':
+            # A colgroup element's end tag may be omitted if the colgroup
+            # element is not immediately followed by a space character or
+            # a comment.
+            if type in ("Comment", "SpaceCharacters"):
+                return False
+            elif type == "StartTag":
+                # XXX: we also look for an immediately following colgroup
+                # element. See is_optional_start.
+                return next["name"] != 'colgroup'
+            else:
+                return True
+        elif tagname in ('thead', 'tbody'):
+            # A thead element's end tag may be omitted if the thead element
+            # is immediately followed by a tbody or tfoot element.
+            # A tbody element's end tag may be omitted if the tbody element
+            # is immediately followed by a tbody or tfoot element, or if
+            # there is no more content in the parent element.
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] in ['tbody', 'tfoot']
+            elif tagname == 'tbody':
+                return type == "EndTag" or type is None
+            else:
+                return False
+        elif tagname == 'tfoot':
+            # A tfoot element's end tag may be omitted if the tfoot element
+            # is immediately followed by a tbody element, or if there is no
+            # more content in the parent element.
+            # XXX: we never omit the end tag when the following element is
+            # a tbody. See is_optional_start.
+            if type == "StartTag":
+                return next["name"] == 'tbody'
+            else:
+                return type == "EndTag" or type is None
+        elif tagname in ('td', 'th'):
+            # A td element's end tag may be omitted if the td element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            # A th element's end tag may be omitted if the th element is
+            # immediately followed by a td or th element, or if there is
+            # no more content in the parent element.
+            if type == "StartTag":
+                return next["name"] in ('td', 'th')
+            else:
+                return type == "EndTag" or type is None
+        return False
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
new file mode 100644
index 00000000..00235278
--- /dev/null
+++ b/html5lib/filters/sanitizer.py
@@ -0,0 +1,8 @@
+import _base
+from html5lib.sanitizer import HTMLSanitizerMixin
+
+class Filter(_base.Filter, HTMLSanitizerMixin):
+    def __iter__(self):
+        for token in _base.Filter.__iter__(self):
+            token = self.sanitize_token(token)
+            if token: yield token
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
new file mode 100644
index 00000000..74d6f4d8
--- /dev/null
+++ b/html5lib/filters/whitespace.py
@@ -0,0 +1,41 @@
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import ImmutableSet as frozenset
+
+import re
+
+import _base
+from html5lib.constants import rcdataElements, spaceCharacters
+spaceCharacters = u"".join(spaceCharacters)
+
+SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters)
+
+class Filter(_base.Filter):
+
+    spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
+
+    def __iter__(self):
+        preserve = 0
+        for token in _base.Filter.__iter__(self):
+            type = token["type"]
+            if type == "StartTag" \
+              and (preserve or token["name"] in self.spacePreserveElements):
+                preserve += 1
+
+            elif type == "EndTag" and preserve:
+                preserve -= 1
+
+            elif not preserve and type == "SpaceCharacters" and token["data"]:
+                # Test on token["data"] above to not introduce spaces where there were not
+                token["data"] = u" "
+
+            elif not preserve and type == "Characters":
+                token["data"] = collapse_spaces(token["data"])
+
+            yield token
+
+def collapse_spaces(text):
+    return SPACES_REGEX.sub(' ', text)
+
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
new file mode 100644
index 00000000..08a8f8ad
--- /dev/null
+++ b/html5lib/html5parser.py
@@ -0,0 +1,2733 @@
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
+
+try:
+    any
+except:
+    # Implement 'any' for python 2.4 and previous
+    def any(iterable):
+        for element in iterable:
+            if element:
+                return True
+        return False
+        
+try:
+    "abc".startswith(("a", "b"))
+    def startswithany(str, prefixes):
+        return str.startswith(prefixes)
+except:
+    # Python 2.4 doesn't accept a tuple as argument to string startswith
+    def startswithany(str, prefixes):
+        for prefix in prefixes:
+            if str.startswith(prefix):
+                return True
+        return False
+
+import sys
+import types
+
+import inputstream
+import tokenizer
+
+import treebuilders
+from treebuilders._base import Marker
+from treebuilders import simpletree
+
+import utils
+import constants
+from constants import spaceCharacters, asciiUpper2Lower
+from constants import formattingElements, specialElements
+from constants import headingElements, tableInsertModeElements
+from constants import cdataElements, rcdataElements, voidElements
+from constants import tokenTypes, ReparseException, namespaces, spaceCharacters
+from constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
+
+def parse(doc, treebuilder="simpletree", encoding=None,
+          namespaceHTMLElements=True):
+    """Parse a string or file-like object into a tree"""
+    tb = treebuilders.getTreeBuilder(treebuilder)
+    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+    return p.parse(doc, encoding=encoding)
+
+def parseFragment(doc, container="div", treebuilder="simpletree", encoding=None, 
+                  namespaceHTMLElements=True):
+    tb = treebuilders.getTreeBuilder(treebuilder)
+    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
+    return p.parseFragment(doc, container=container, encoding=encoding)
+
+def method_decorator_metaclass(function):
+    class Decorated(type):
+        def __new__(meta, classname, bases, classDict):
+            for attributeName, attribute in classDict.iteritems():
+                if type(attribute) == types.FunctionType:
+                    attribute = function(attribute)
+
+                classDict[attributeName] = attribute
+            return  type.__new__(meta, classname, bases, classDict)
+    return Decorated
+
+class HTMLParser(object):
+    """HTML parser. Generates a tree structure from a stream of (possibly
+        malformed) HTML"""
+
+    def __init__(self, tree = simpletree.TreeBuilder,
+                 tokenizer = tokenizer.HTMLTokenizer, strict = False,
+                 namespaceHTMLElements = True, debug=False):
+        """
+        strict - raise an exception when a parse error is encountered
+
+        tree - a treebuilder class controlling the type of tree that will be
+        returned. Built in treebuilders can be accessed through
+        html5lib.treebuilders.getTreeBuilder(treeType)
+        
+        tokenizer - a class that provides a stream of tokens to the treebuilder.
+        This may be replaced for e.g. a sanitizer which converts some tags to
+        text
+        """
+
+        # Raise an exception on the first error encountered
+        self.strict = strict
+
+        self.tree = tree(namespaceHTMLElements)
+        self.tokenizer_class = tokenizer
+        self.errors = []
+
+        self.phases = dict([(name, cls(self, self.tree)) for name, cls in
+                            getPhases(debug).iteritems()])
+
+    def _parse(self, stream, innerHTML=False, container="div",
+               encoding=None, parseMeta=True, useChardet=True, **kwargs):
+
+        self.innerHTMLMode = innerHTML
+        self.container = container
+        self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
+                                              parseMeta=parseMeta,
+                                              useChardet=useChardet, 
+                                              parser=self, **kwargs)
+        self.reset()
+
+        while True:
+            try:
+                self.mainLoop()
+                break
+            except ReparseException, e:
+                self.reset()
+
+    def reset(self):
+        self.tree.reset()
+        self.firstStartTag = False
+        self.errors = []
+        self.log = [] #only used with debug mode
+        # "quirks" / "limited quirks" / "no quirks"
+        self.compatMode = "no quirks"
+
+        if self.innerHTMLMode:
+            self.innerHTML = self.container.lower()
+
+            if self.innerHTML in cdataElements:
+                self.tokenizer.state = self.tokenizer.rcdataState
+            elif self.innerHTML in rcdataElements:
+                self.tokenizer.state = self.tokenizer.rawtextState
+            elif self.innerHTML == 'plaintext':
+                self.tokenizer.state = self.tokenizer.plaintextState
+            else:
+                # state already is data state
+                # self.tokenizer.state = self.tokenizer.dataState
+                pass
+            self.phase = self.phases["beforeHtml"]
+            self.phase.insertHtmlElement()
+            self.resetInsertionMode()
+        else:
+            self.innerHTML = False
+            self.phase = self.phases["initial"]
+
+        self.lastPhase = None
+
+        self.beforeRCDataPhase = None
+
+        self.framesetOK = True
+
+    def isHTMLIntegrationPoint(self, element):
+        if (element.name == "annotation-xml" and 
+            element.namespace == namespaces["mathml"]):
+            return ("encoding" in element.attributes and
+                    element.attributes["encoding"].translate(
+                        asciiUpper2Lower) in 
+                    ("text/html", "application/xhtml+xml"))
+        else:
+            return (element.namespace, element.name) in htmlIntegrationPointElements
+
+    def isMathMLTextIntegrationPoint(self, element):
+        return (element.namespace, element.name) in mathmlTextIntegrationPointElements
+        
+    def mainLoop(self):
+        CharactersToken = tokenTypes["Characters"]
+        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
+        StartTagToken = tokenTypes["StartTag"]
+        EndTagToken = tokenTypes["EndTag"]
+        CommentToken = tokenTypes["Comment"]
+        DoctypeToken = tokenTypes["Doctype"]
+        ParseErrorToken = tokenTypes["ParseError"]
+        
+        for token in self.normalizedTokens():
+            new_token = token
+            while new_token is not None:
+                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
+                currentNodeNamespace = currentNode.namespace if currentNode else None
+                currentNodeName = currentNode.name if currentNode else None
+
+                type = new_token["type"]
+                
+                if type == ParseErrorToken:
+                    self.parseError(new_token["data"], new_token.get("datavars", {}))
+                    new_token = None
+                else:
+                    if (len(self.tree.openElements) == 0 or
+                        currentNodeNamespace == self.tree.defaultNamespace or
+                        (self.isMathMLTextIntegrationPoint(currentNode) and
+                         ((type == StartTagToken and
+                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
+                         type in (CharactersToken, SpaceCharactersToken))) or
+                        (currentNodeNamespace == namespaces["mathml"] and
+                         currentNodeName == "annotation-xml" and
+                         token["name"] == "svg") or
+                        (self.isHTMLIntegrationPoint(currentNode) and
+                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
+                        phase = self.phase
+                    else:
+                        phase = self.phases["inForeignContent"]
+
+                    if type == CharactersToken:
+                        new_token = phase.processCharacters(new_token)
+                    elif type == SpaceCharactersToken:
+                         new_token= phase.processSpaceCharacters(new_token)
+                    elif type == StartTagToken:
+                        new_token = phase.processStartTag(new_token)
+                    elif type == EndTagToken:
+                        new_token = phase.processEndTag(new_token)
+                    elif type == CommentToken:
+                        new_token = phase.processComment(new_token)
+                    elif type == DoctypeToken:
+                        new_token = phase.processDoctype(new_token)
+
+            if (type == StartTagToken and token["selfClosing"]
+                and not token["selfClosingAcknowledged"]):
+                self.parseError("non-void-element-with-trailing-solidus",
+                                {"name":token["name"]})
+
+
+        # When the loop finishes it's EOF
+        reprocess = True
+        phases = []
+        while reprocess:
+            phases.append(self.phase)
+            reprocess = self.phase.processEOF()
+            if reprocess:
+                assert self.phase not in phases
+
+    def normalizedTokens(self):
+        for token in self.tokenizer:
+            yield self.normalizeToken(token)
+
+    def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
+        """Parse a HTML document into a well-formed tree
+
+        stream - a filelike object or string containing the HTML to be parsed
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+        """
+        self._parse(stream, innerHTML=False, encoding=encoding, 
+                    parseMeta=parseMeta, useChardet=useChardet)
+        return self.tree.getDocument()
+    
+    def parseFragment(self, stream, container="div", encoding=None,
+                      parseMeta=False, useChardet=True):
+        """Parse a HTML fragment into a well-formed tree fragment
+        
+        container - name of the element we're setting the innerHTML property
+        if set to None, default to 'div'
+
+        stream - a filelike object or string containing the HTML to be parsed
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+        """
+        self._parse(stream, True, container=container, encoding=encoding)
+        return self.tree.getFragment()
+
+    def parseError(self, errorcode="XXX-undefined-error", datavars={}):
+        # XXX The idea is to make errorcode mandatory.
+        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
+        if self.strict:
+            raise ParseError
+
+    def normalizeToken(self, token):
+        """ HTML5 specific normalizations to the token stream """
+
+        if token["type"] == tokenTypes["StartTag"]:
+            token["data"] = dict(token["data"][::-1])
+
+        return token
+
+    def adjustMathMLAttributes(self, token):
+        replacements = {"definitionurl":u"definitionURL"}
+        for k,v in replacements.iteritems():
+            if k in token["data"]:
+                token["data"][v] = token["data"][k]
+                del token["data"][k]
+
+    def adjustSVGAttributes(self, token):
+        replacements = {
+            "attributename":u"attributeName",
+            "attributetype":u"attributeType",
+            "basefrequency":u"baseFrequency",
+            "baseprofile":u"baseProfile",
+            "calcmode":u"calcMode",
+            "clippathunits":u"clipPathUnits",
+            "contentscripttype":u"contentScriptType",
+            "contentstyletype":u"contentStyleType",
+            "diffuseconstant":u"diffuseConstant",
+            "edgemode":u"edgeMode",
+            "externalresourcesrequired":u"externalResourcesRequired",
+            "filterres":u"filterRes",
+            "filterunits":u"filterUnits",
+            "glyphref":u"glyphRef",
+            "gradienttransform":u"gradientTransform",
+            "gradientunits":u"gradientUnits",
+            "kernelmatrix":u"kernelMatrix",
+            "kernelunitlength":u"kernelUnitLength",
+            "keypoints":u"keyPoints",
+            "keysplines":u"keySplines",
+            "keytimes":u"keyTimes",
+            "lengthadjust":u"lengthAdjust",
+            "limitingconeangle":u"limitingConeAngle",
+            "markerheight":u"markerHeight",
+            "markerunits":u"markerUnits",
+            "markerwidth":u"markerWidth",
+            "maskcontentunits":u"maskContentUnits",
+            "maskunits":u"maskUnits",
+            "numoctaves":u"numOctaves",
+            "pathlength":u"pathLength",
+            "patterncontentunits":u"patternContentUnits",
+            "patterntransform":u"patternTransform",
+            "patternunits":u"patternUnits",
+            "pointsatx":u"pointsAtX",
+            "pointsaty":u"pointsAtY",
+            "pointsatz":u"pointsAtZ",
+            "preservealpha":u"preserveAlpha",
+            "preserveaspectratio":u"preserveAspectRatio",
+            "primitiveunits":u"primitiveUnits",
+            "refx":u"refX",
+            "refy":u"refY",
+            "repeatcount":u"repeatCount",
+            "repeatdur":u"repeatDur",
+            "requiredextensions":u"requiredExtensions",
+            "requiredfeatures":u"requiredFeatures",
+            "specularconstant":u"specularConstant",
+            "specularexponent":u"specularExponent",
+            "spreadmethod":u"spreadMethod",
+            "startoffset":u"startOffset",
+            "stddeviation":u"stdDeviation",
+            "stitchtiles":u"stitchTiles",
+            "surfacescale":u"surfaceScale",
+            "systemlanguage":u"systemLanguage",
+            "tablevalues":u"tableValues",
+            "targetx":u"targetX",
+            "targety":u"targetY",
+            "textlength":u"textLength",
+            "viewbox":u"viewBox",
+            "viewtarget":u"viewTarget",
+            "xchannelselector":u"xChannelSelector",
+            "ychannelselector":u"yChannelSelector",
+            "zoomandpan":u"zoomAndPan"
+            }
+        for originalName in token["data"].keys():
+            if originalName in replacements:
+                svgName = replacements[originalName]
+                token["data"][svgName] = token["data"][originalName]
+                del token["data"][originalName]
+
+    def adjustForeignAttributes(self, token):
+        replacements = {
+            "xlink:actuate":("xlink", "actuate", namespaces["xlink"]),
+            "xlink:arcrole":("xlink", "arcrole", namespaces["xlink"]),
+            "xlink:href":("xlink", "href", namespaces["xlink"]),
+            "xlink:role":("xlink", "role", namespaces["xlink"]),
+            "xlink:show":("xlink", "show", namespaces["xlink"]),
+            "xlink:title":("xlink", "title", namespaces["xlink"]),
+            "xlink:type":("xlink", "type", namespaces["xlink"]),
+            "xml:base":("xml", "base", namespaces["xml"]),
+            "xml:lang":("xml", "lang", namespaces["xml"]),
+            "xml:space":("xml", "space", namespaces["xml"]),
+            "xmlns":(None, "xmlns", namespaces["xmlns"]),
+            "xmlns:xlink":("xmlns", "xlink", namespaces["xmlns"])
+            }
+
+        for originalName in token["data"].iterkeys():
+            if originalName in replacements:
+                foreignName = replacements[originalName]
+                token["data"][foreignName] = token["data"][originalName]
+                del token["data"][originalName]
+
+    def reparseTokenNormal(self, token):
+        self.parser.phase()
+
+    def resetInsertionMode(self):
+        # The name of this method is mostly historical. (It's also used in the
+        # specification.)
+        last = False
+        newModes = {
+            "select":"inSelect",
+            "td":"inCell",
+            "th":"inCell",
+            "tr":"inRow",
+            "tbody":"inTableBody",
+            "thead":"inTableBody",
+            "tfoot":"inTableBody",
+            "caption":"inCaption",
+            "colgroup":"inColumnGroup",
+            "table":"inTable",
+            "head":"inBody",
+            "body":"inBody",
+            "frameset":"inFrameset",
+            "html":"beforeHead"
+        }
+        for node in self.tree.openElements[::-1]:
+            nodeName = node.name
+            new_phase = None
+            if node == self.tree.openElements[0]:
+                assert self.innerHTML
+                last = True
+                nodeName = self.innerHTML
+            # Check for conditions that should only happen in the innerHTML
+            # case
+            if nodeName in ("select", "colgroup", "head", "html"):
+                assert self.innerHTML
+
+            if not last and node.namespace != self.tree.defaultNamespace:
+                continue
+
+            if nodeName in newModes:
+                new_phase = self.phases[newModes[nodeName]]
+                break
+            elif last:
+                new_phase = self.phases["inBody"]
+                break
+
+        self.phase = new_phase
+
+    def parseRCDataRawtext(self, token, contentType):
+        """Generic RCDATA/RAWTEXT Parsing algorithm
+        contentType - RCDATA or RAWTEXT
+        """
+        assert contentType in ("RAWTEXT", "RCDATA")
+        
+        element = self.tree.insertElement(token)
+        
+        if contentType == "RAWTEXT":
+            self.tokenizer.state = self.tokenizer.rawtextState
+        else:
+            self.tokenizer.state = self.tokenizer.rcdataState
+
+        self.originalPhase = self.phase
+
+        self.phase = self.phases["text"]
+
+def getPhases(debug):
+    def log(function):
+        """Logger that records which phase processes each token"""
+        type_names = dict((value, key) for key, value in 
+                          constants.tokenTypes.iteritems())
+        def wrapped(self, *args, **kwargs):
+            if function.__name__.startswith("process") and len(args) > 0:
+                token = args[0]
+                try:
+                    info = {"type":type_names[token['type']]}
+                except:
+                    raise
+                if token['type'] in constants.tagTokenTypes:
+                    info["name"] = token['name']
+
+                self.parser.log.append((self.parser.tokenizer.state.__name__,
+                                        self.parser.phase.__class__.__name__, 
+                                        self.__class__.__name__, 
+                                        function.__name__, 
+                                        info))
+                return function(self, *args, **kwargs)
+            else:
+                return function(self, *args, **kwargs)
+        return wrapped
+
+    def getMetaclass(use_metaclass, metaclass_func):
+        if use_metaclass:
+            return method_decorator_metaclass(metaclass_func)
+        else:
+            return type
+
+    class Phase(object):
+        """Base class for helper object that implements each phase of processing
+        """
+        # Order should be (they can be omitted):
+        # * EOF
+        # * Comment
+        # * Doctype
+        # * SpaceCharacters
+        # * Characters
+        # * StartTag
+        #   - startTag* methods
+        # * EndTag
+        #   - endTag* methods
+
+        __metaclass__ = getMetaclass(debug, log)
+
+        def __init__(self, parser, tree):
+            self.parser = parser
+            self.tree = tree
+
+        def processEOF(self):
+            raise NotImplementedError
+
+        def processComment(self, token):
+            # For most phases the following is correct. Where it's not it will be
+            # overridden.
+            self.tree.insertComment(token, self.tree.openElements[-1])
+
+        def processDoctype(self, token):
+            self.parser.parseError("unexpected-doctype")
+
+        def processCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processSpaceCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processStartTag(self, token):
+            return self.startTagHandler[token["name"]](token)
+
+        def startTagHtml(self, token):
+            if self.parser.firstStartTag == False and token["name"] == "html":
+               self.parser.parseError("non-html-root")
+            # XXX Need a check here to see if the first start tag token emitted is
+            # this token... If it's not, invoke self.parser.parseError().
+            for attr, value in token["data"].iteritems():
+                if attr not in self.tree.openElements[0].attributes:
+                    self.tree.openElements[0].attributes[attr] = value
+            self.parser.firstStartTag = False
+
+        def processEndTag(self, token):
+            return self.endTagHandler[token["name"]](token)
+
+    class InitialPhase(Phase):
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processDoctype(self, token):
+            name = token["name"]
+            publicId = token["publicId"]
+            systemId = token["systemId"]
+            correct = token["correct"]
+
+            if (name != "html" or publicId != None or
+                systemId != None and systemId != "about:legacy-compat"):
+                self.parser.parseError("unknown-doctype")
+
+            if publicId is None:
+                publicId = ""
+
+            self.tree.insertDoctype(token)
+
+            if publicId != "":
+                publicId = publicId.translate(asciiUpper2Lower)
+
+            if (not correct or token["name"] != "html"
+                or startswithany(publicId,
+                ("+//silmaril//dtd html pro v0r11 19970101//",
+                 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
+                 "-//as//dtd html 3.0 aswedit + extensions//",
+                 "-//ietf//dtd html 2.0 level 1//",
+                 "-//ietf//dtd html 2.0 level 2//",
+                 "-//ietf//dtd html 2.0 strict level 1//",
+                 "-//ietf//dtd html 2.0 strict level 2//",
+                 "-//ietf//dtd html 2.0 strict//",
+                 "-//ietf//dtd html 2.0//",
+                 "-//ietf//dtd html 2.1e//",
+                 "-//ietf//dtd html 3.0//",
+                 "-//ietf//dtd html 3.2 final//",
+                 "-//ietf//dtd html 3.2//",
+                 "-//ietf//dtd html 3//",
+                 "-//ietf//dtd html level 0//",
+                 "-//ietf//dtd html level 1//",
+                 "-//ietf//dtd html level 2//",
+                 "-//ietf//dtd html level 3//",
+                 "-//ietf//dtd html strict level 0//",
+                 "-//ietf//dtd html strict level 1//",
+                 "-//ietf//dtd html strict level 2//",
+                 "-//ietf//dtd html strict level 3//",
+                 "-//ietf//dtd html strict//",
+                 "-//ietf//dtd html//",
+                 "-//metrius//dtd metrius presentational//",
+                 "-//microsoft//dtd internet explorer 2.0 html strict//",
+                 "-//microsoft//dtd internet explorer 2.0 html//",
+                 "-//microsoft//dtd internet explorer 2.0 tables//",
+                 "-//microsoft//dtd internet explorer 3.0 html strict//",
+                 "-//microsoft//dtd internet explorer 3.0 html//",
+                 "-//microsoft//dtd internet explorer 3.0 tables//",
+                 "-//netscape comm. corp.//dtd html//",
+                 "-//netscape comm. corp.//dtd strict html//",
+                 "-//o'reilly and associates//dtd html 2.0//",
+                 "-//o'reilly and associates//dtd html extended 1.0//",
+                 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
+                 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
+                 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
+                 "-//spyglass//dtd html 2.0 extended//",
+                 "-//sq//dtd html 2.0 hotmetal + extensions//",
+                 "-//sun microsystems corp.//dtd hotjava html//",
+                 "-//sun microsystems corp.//dtd hotjava strict html//",
+                 "-//w3c//dtd html 3 1995-03-24//",
+                 "-//w3c//dtd html 3.2 draft//",
+                 "-//w3c//dtd html 3.2 final//",
+                 "-//w3c//dtd html 3.2//",
+                 "-//w3c//dtd html 3.2s draft//",
+                 "-//w3c//dtd html 4.0 frameset//",
+                 "-//w3c//dtd html 4.0 transitional//",
+                 "-//w3c//dtd html experimental 19960712//",
+                 "-//w3c//dtd html experimental 970421//",
+                 "-//w3c//dtd w3 html//",
+                 "-//w3o//dtd w3 html 3.0//",
+                 "-//webtechs//dtd mozilla html 2.0//",
+                 "-//webtechs//dtd mozilla html//"))
+                or publicId in
+                    ("-//w3o//dtd w3 html strict 3.0//en//",
+                     "-/w3c/dtd html 4.0 transitional/en",
+                     "html")
+                or startswithany(publicId,
+                    ("-//w3c//dtd html 4.01 frameset//",
+                     "-//w3c//dtd html 4.01 transitional//")) and 
+                    systemId == None
+                or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
+                self.parser.compatMode = "quirks"
+            elif (startswithany(publicId,
+                    ("-//w3c//dtd xhtml 1.0 frameset//",
+                     "-//w3c//dtd xhtml 1.0 transitional//"))
+                  or startswithany(publicId,
+                      ("-//w3c//dtd html 4.01 frameset//",
+                       "-//w3c//dtd html 4.01 transitional//")) and 
+                      systemId != None):
+                self.parser.compatMode = "limited quirks"
+
+            self.parser.phase = self.parser.phases["beforeHtml"]
+
+        def anythingElse(self):
+            self.parser.compatMode = "quirks"
+            self.parser.phase = self.parser.phases["beforeHtml"]
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-doctype-but-got-chars")
+            self.anythingElse()
+            return token
+
+        def processStartTag(self, token):
+            self.parser.parseError("expected-doctype-but-got-start-tag",
+              {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-doctype-but-got-end-tag",
+              {"name": token["name"]})
+            self.anythingElse()
+            return token
+
+        def processEOF(self):
+            self.parser.parseError("expected-doctype-but-got-eof")
+            self.anythingElse()
+            return True
+
+
+    class BeforeHtmlPhase(Phase):
+        # helper methods
+        def insertHtmlElement(self):
+            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
+            self.parser.phase = self.parser.phases["beforeHead"]
+
+        # other
+        def processEOF(self):
+            self.insertHtmlElement()
+            return True
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processCharacters(self, token):
+            self.insertHtmlElement()
+            return token
+
+        def processStartTag(self, token):
+            if token["name"] == "html":
+                self.parser.firstStartTag = True
+            self.insertHtmlElement()
+            return token
+
+        def processEndTag(self, token):
+            if token["name"] not in ("head", "body", "html", "br"):
+                self.parser.parseError("unexpected-end-tag-before-html",
+                  {"name": token["name"]})
+            else:
+                self.insertHtmlElement()
+                return token
+
+
+    class BeforeHeadPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("head", self.startTagHead)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                (("head", "body", "html", "br"), self.endTagImplyHead)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return True
+
+        def processSpaceCharacters(self, token):
+            pass
+
+        def processCharacters(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagHead(self, token):
+            self.tree.insertElement(token)
+            self.tree.headPointer = self.tree.openElements[-1]
+            self.parser.phase = self.parser.phases["inHead"]
+
+        def startTagOther(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def endTagImplyHead(self, token):
+            self.startTagHead(impliedTagToken("head", "StartTag"))
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("end-tag-after-implied-root",
+              {"name": token["name"]})
+
+    class InHeadPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler =  utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("title", self.startTagTitle),
+                (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
+                ("script", self.startTagScript),
+                (("base", "basefont", "bgsound", "command", "link"), 
+                 self.startTagBaseLinkCommand),
+                ("meta", self.startTagMeta),
+                ("head", self.startTagHead)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self. endTagHandler = utils.MethodDispatcher([
+                ("head", self.endTagHead),
+                (("br", "html", "body"), self.endTagHtmlBodyBr)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # the real thing
+        def processEOF (self):
+            self.anythingElse()
+            return True
+
+        def processCharacters(self, token):
+            self.anythingElse()
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagHead(self, token):
+            self.parser.parseError("two-heads-are-not-better-than-one")
+
+        def startTagBaseLinkCommand(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+        def startTagMeta(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+            attributes = token["data"]
+            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
+                if "charset" in attributes:
+                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
+                elif "content" in attributes:
+                    # Encoding it as UTF-8 here is a hack, as really we should pass
+                    # the abstract Unicode string, and just use the
+                    # ContentAttrParser on that, but using UTF-8 allows all chars
+                    # to be encoded and as a ASCII-superset works.
+                    data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
+                    parser = inputstream.ContentAttrParser(data)
+                    codec = parser.parse()
+                    self.parser.tokenizer.stream.changeEncoding(codec)
+
+        def startTagTitle(self, token):
+            self.parser.parseRCDataRawtext(token, "RCDATA")
+
+        def startTagNoScriptNoFramesStyle(self, token):
+            #Need to decide whether to implement the scripting-disabled case
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagScript(self, token):
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
+            self.parser.originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["text"]
+
+        def startTagOther(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagHead(self, token):
+            node = self.parser.tree.openElements.pop()
+            assert node.name == "head", "Expected head got %s"%node.name
+            self.parser.phase = self.parser.phases["afterHead"]
+
+        def endTagHtmlBodyBr(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def anythingElse(self):
+            self.endTagHead(impliedTagToken("head"))
+
+
+    # XXX If we implement a parser for which scripting is disabled we need to
+    # implement this phase.
+    #
+    # class InHeadNoScriptPhase(Phase):
+
+    class AfterHeadPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("body", self.startTagBody),
+                ("frameset", self.startTagFrameset),
+                (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", 
+                  "style", "title"),
+                  self.startTagFromHead),
+                ("head", self.startTagHead)
+            ])
+            self.startTagHandler.default = self.startTagOther
+            self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"), 
+                                                          self.endTagHtmlBodyBr)])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            self.anythingElse()
+            return True
+
+        def processCharacters(self, token):
+            self.anythingElse()
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagBody(self, token):
+            self.parser.framesetOK = False
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inBody"]
+
+        def startTagFrameset(self, token):
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inFrameset"]
+
+        def startTagFromHead(self, token):
+            self.parser.parseError("unexpected-start-tag-out-of-my-head",
+              {"name": token["name"]})
+            self.tree.openElements.append(self.tree.headPointer)
+            self.parser.phases["inHead"].processStartTag(token)
+            for node in self.tree.openElements[::-1]:
+                if node.name == "head":
+                    self.tree.openElements.remove(node)
+                    break
+
+        def startTagHead(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name":token["name"]})
+
+        def startTagOther(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagHtmlBodyBr(self, token):
+            self.anythingElse()
+            return token
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name":token["name"]})
+
+        def anythingElse(self):
+            self.tree.insertElement(impliedTagToken("body", "StartTag"))
+            self.parser.phase = self.parser.phases["inBody"]
+            self.parser.framesetOK = True
+
+
+    class InBodyPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
+        # the really-really-really-very crazy mode
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            #Keep a ref to this for special handling of whitespace in <pre>
+            self.processSpaceCharactersNonPre = self.processSpaceCharacters
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("base", "basefont", "bgsound", "command", "link", "meta", 
+                  "noframes", "script", "style", "title"), 
+                 self.startTagProcessInHead),
+                ("body", self.startTagBody),
+                ("frameset", self.startTagFrameset),
+                (("address", "article", "aside", "blockquote", "center", "details",
+                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "menu", "nav", "ol", "p",
+                  "section", "summary", "ul"),
+                  self.startTagCloseP),
+                (headingElements, self.startTagHeading),
+                (("pre", "listing"), self.startTagPreListing),
+                ("form", self.startTagForm),
+                (("li", "dd", "dt"), self.startTagListItem),
+                ("plaintext",self.startTagPlaintext),
+                ("a", self.startTagA),
+                (("b", "big", "code", "em", "font", "i", "s", "small", "strike", 
+                  "strong", "tt", "u"),self.startTagFormatting),
+                ("nobr", self.startTagNobr),
+                ("button", self.startTagButton),
+                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
+                ("xmp", self.startTagXmp),
+                ("table", self.startTagTable),
+                (("area", "br", "embed", "img", "keygen", "wbr"),
+                 self.startTagVoidFormatting),
+                (("param", "source", "track"), self.startTagParamSource),
+                ("input", self.startTagInput),
+                ("hr", self.startTagHr),
+                ("image", self.startTagImage),
+                ("isindex", self.startTagIsIndex),
+                ("textarea", self.startTagTextarea),
+                ("iframe", self.startTagIFrame),
+                (("noembed", "noframes", "noscript"), self.startTagRawtext),
+                ("select", self.startTagSelect),
+                (("rp", "rt"), self.startTagRpRt),
+                (("option", "optgroup"), self.startTagOpt),
+                (("math"), self.startTagMath),
+                (("svg"), self.startTagSvg),
+                (("caption", "col", "colgroup", "frame", "head",
+                  "tbody", "td", "tfoot", "th", "thead",
+                  "tr"), self.startTagMisplaced)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("body",self.endTagBody),
+                ("html",self.endTagHtml),
+                (("address", "article", "aside", "blockquote", "center",
+                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
+                  "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", 
+                  "section", "summary", "ul"), self.endTagBlock),
+                ("form", self.endTagForm),
+                ("p",self.endTagP),
+                (("dd", "dt", "li"), self.endTagListItem),
+                (headingElements, self.endTagHeading),
+                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
+                  "strike", "strong", "tt", "u"), self.endTagFormatting),
+                (("applet",  "marquee", "object"), self.endTagAppletMarqueeObject),
+                ("br", self.endTagBr),
+                ])
+            self.endTagHandler.default = self.endTagOther
+
+        def isMatchingFormattingElement(self, node1, node2):
+            if node1.name != node2.name or node1.namespace != node2.namespace:
+                return False
+            elif len(node1.attributes) != len(node2.attributes):
+                return False
+            else:
+                attributes1 = sorted(node1.attributes.items())
+                attributes2 = sorted(node2.attributes.items())
+                for attr1, attr2 in zip(attributes1, attributes2):
+                    if attr1 != attr2:
+                        return False
+            return True
+
+        # helper
+        def addFormattingElement(self, token):
+            self.tree.insertElement(token)
+            element = self.tree.openElements[-1]
+            
+            matchingElements = []
+            for node in self.tree.activeFormattingElements[::-1]:
+                if node is Marker:
+                    break
+                elif self.isMatchingFormattingElement(node, element):
+                    matchingElements.append(node)
+                    
+            assert len(matchingElements) <= 3
+            if len(matchingElements) == 3:
+                self.tree.activeFormattingElements.remove(matchingElements[-1])
+            self.tree.activeFormattingElements.append(element)
+
+        # the real deal
+        def processEOF(self):
+            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
+                                          "tfoot", "th", "thead", "tr", "body",
+                                          "html"))
+            for node in self.tree.openElements[::-1]:
+                if node.name not in allowed_elements:
+                    self.parser.parseError("expected-closing-tag-but-got-eof")
+                    break
+            #Stop parsing
+
+        def processSpaceCharactersDropNewline(self, token):
+            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
+            # want to drop leading newlines
+            data = token["data"]
+            self.processSpaceCharacters = self.processSpaceCharactersNonPre
+            if (data.startswith("\n") and
+                self.tree.openElements[-1].name in ("pre", "listing", "textarea")
+                and not self.tree.openElements[-1].hasContent()):
+                data = data[1:]
+            if data:
+                self.tree.reconstructActiveFormattingElements()
+                self.tree.insertText(data)
+
+        def processCharacters(self, token):
+            if token["data"] == u"\u0000":
+                #The tokenizer should always emit null on its own
+                return
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertText(token["data"])
+            #This must be bad for performance
+            if (self.parser.framesetOK and
+                any([char not in spaceCharacters
+                     for char in token["data"]])):
+                self.parser.framesetOK = False
+
+        def processSpaceCharacters(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertText(token["data"])
+
+        def startTagProcessInHead(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagBody(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": "body"})
+            if (len(self.tree.openElements) == 1
+                or self.tree.openElements[1].name != "body"):
+                assert self.parser.innerHTML
+            else:
+                self.parser.framesetOK = False
+                for attr, value in token["data"].iteritems():
+                    if attr not in self.tree.openElements[1].attributes:
+                        self.tree.openElements[1].attributes[attr] = value
+
+        def startTagFrameset(self, token):
+            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
+            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
+                assert self.parser.innerHTML
+            elif not self.parser.framesetOK:
+                pass
+            else:
+                if self.tree.openElements[1].parent:
+                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
+                while self.tree.openElements[-1].name != "html":
+                    self.tree.openElements.pop()
+                self.tree.insertElement(token)
+                self.parser.phase = self.parser.phases["inFrameset"]
+
+        def startTagCloseP(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+
+        def startTagPreListing(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+
+        def startTagForm(self, token):
+            if self.tree.formPointer:
+                self.parser.parseError(u"unexpected-start-tag", {"name": "form"})
+            else:
+                if self.tree.elementInScope("p", variant="button"):
+                    self.endTagP(impliedTagToken("p"))
+                self.tree.insertElement(token)
+                self.tree.formPointer = self.tree.openElements[-1]
+
+        def startTagListItem(self, token):
+            self.parser.framesetOK = False
+
+            stopNamesMap = {"li":["li"],
+                            "dt":["dt", "dd"],
+                            "dd":["dt", "dd"]}
+            stopNames = stopNamesMap[token["name"]]
+            for node in reversed(self.tree.openElements):
+                if node.name in stopNames:
+                    self.parser.phase.processEndTag(
+                        impliedTagToken(node.name, "EndTag"))
+                    break
+                if (node.nameTuple in specialElements and
+                    node.name not in ("address", "div", "p")):
+                    break
+
+            if self.tree.elementInScope("p", variant="button"):
+                self.parser.phase.processEndTag(
+                    impliedTagToken("p", "EndTag"))
+
+            self.tree.insertElement(token)
+
+        def startTagPlaintext(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
+
+        def startTagHeading(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            if self.tree.openElements[-1].name in headingElements:
+                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagA(self, token):
+            afeAElement = self.tree.elementInActiveFormattingElements("a")
+            if afeAElement:
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                  {"startName": "a", "endName": "a"})
+                self.endTagFormatting(impliedTagToken("a"))
+                if afeAElement in self.tree.openElements:
+                    self.tree.openElements.remove(afeAElement)
+                if afeAElement in self.tree.activeFormattingElements:
+                    self.tree.activeFormattingElements.remove(afeAElement)
+            self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagFormatting(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagNobr(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            if self.tree.elementInScope("nobr"):
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                  {"startName": "nobr", "endName": "nobr"})
+                self.processEndTag(impliedTagToken("nobr"))
+                # XXX Need tests that trigger the following
+                self.tree.reconstructActiveFormattingElements()
+            self.addFormattingElement(token)
+
+        def startTagButton(self, token):
+            if self.tree.elementInScope("button"):
+                self.parser.parseError("unexpected-start-tag-implies-end-tag",
+                  {"startName": "button", "endName": "button"})
+                self.processEndTag(impliedTagToken("button"))
+                return token
+            else:
+                self.tree.reconstructActiveFormattingElements()
+                self.tree.insertElement(token)
+                self.parser.framesetOK = False
+
+        def startTagAppletMarqueeObject(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.tree.activeFormattingElements.append(Marker)
+            self.parser.framesetOK = False
+
+        def startTagXmp(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.framesetOK = False
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagTable(self, token):
+            if self.parser.compatMode != "quirks":
+                if self.tree.elementInScope("p", variant="button"):
+                    self.processEndTag(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            self.parser.phase = self.parser.phases["inTable"]
+
+        def startTagVoidFormatting(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+            self.parser.framesetOK = False
+
+        def startTagInput(self, token):
+            framesetOK = self.parser.framesetOK
+            self.startTagVoidFormatting(token)
+            if ("type" in token["data"] and
+                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                #input type=hidden doesn't change framesetOK
+                self.parser.framesetOK = framesetOK
+
+        def startTagParamSource(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+
+        def startTagHr(self, token):
+            if self.tree.elementInScope("p", variant="button"):
+                self.endTagP(impliedTagToken("p"))
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+            token["selfClosingAcknowledged"] = True
+            self.parser.framesetOK = False
+
+        def startTagImage(self, token):
+            # No really...
+            self.parser.parseError("unexpected-start-tag-treated-as",
+              {"originalName": "image", "newName": "img"})
+            self.processStartTag(impliedTagToken("img", "StartTag",
+                                                 attributes=token["data"],
+                                                 selfClosing=token["selfClosing"]))
+
+        def startTagIsIndex(self, token):
+            self.parser.parseError("deprecated-tag", {"name": "isindex"})
+            if self.tree.formPointer:
+                return
+            form_attrs = {}
+            if "action" in token["data"]:
+                form_attrs["action"] = token["data"]["action"]
+            self.processStartTag(impliedTagToken("form", "StartTag",
+                                                 attributes=form_attrs))
+            self.processStartTag(impliedTagToken("hr", "StartTag"))
+            self.processStartTag(impliedTagToken("label", "StartTag"))
+            # XXX Localization ...
+            if "prompt" in token["data"]:
+                prompt = token["data"]["prompt"]
+            else:
+                prompt = u"This is a searchable index. Enter search keywords: "
+            self.processCharacters(
+                {"type":tokenTypes["Characters"], "data":prompt})
+            attributes = token["data"].copy()
+            if "action" in attributes:
+                del attributes["action"]
+            if "prompt" in attributes:
+                del attributes["prompt"]
+            attributes["name"] = "isindex"
+            self.processStartTag(impliedTagToken("input", "StartTag", 
+                                                 attributes = attributes,
+                                                 selfClosing = 
+                                                 token["selfClosing"]))
+            self.processEndTag(impliedTagToken("label"))
+            self.processStartTag(impliedTagToken("hr", "StartTag"))
+            self.processEndTag(impliedTagToken("form"))
+
+        def startTagTextarea(self, token):
+            self.tree.insertElement(token)
+            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
+            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
+            self.parser.framesetOK = False
+
+        def startTagIFrame(self, token):
+            self.parser.framesetOK = False
+            self.startTagRawtext(token)
+
+        def startTagRawtext(self, token):
+            """iframe, noembed noframes, noscript(if scripting enabled)"""
+            self.parser.parseRCDataRawtext(token, "RAWTEXT")
+
+        def startTagOpt(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.parser.phase.processEndTag(impliedTagToken("option"))
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.tree.insertElement(token)
+
+        def startTagSelect(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+            self.parser.framesetOK = False
+            if self.parser.phase in (self.parser.phases["inTable"],
+                                     self.parser.phases["inCaption"],
+                                     self.parser.phases["inColumnGroup"],
+                                     self.parser.phases["inTableBody"], 
+                                     self.parser.phases["inRow"],
+                                     self.parser.phases["inCell"]):
+                self.parser.phase = self.parser.phases["inSelectInTable"]
+            else:
+                self.parser.phase = self.parser.phases["inSelect"]
+
+        def startTagRpRt(self, token):
+            if self.tree.elementInScope("ruby"):
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "ruby":
+                    self.parser.parseError()
+            self.tree.insertElement(token)
+
+        def startTagMath(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.adjustMathMLAttributes(token)
+            self.parser.adjustForeignAttributes(token)
+            token["namespace"] = namespaces["mathml"]
+            self.tree.insertElement(token)
+            #Need to get the parse error right for the case where the token 
+            #has a namespace not equal to the xmlns attribute
+            if token["selfClosing"]:
+                self.tree.openElements.pop()
+                token["selfClosingAcknowledged"] = True
+
+        def startTagSvg(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.parser.adjustSVGAttributes(token)
+            self.parser.adjustForeignAttributes(token)
+            token["namespace"] = namespaces["svg"]
+            self.tree.insertElement(token)
+            #Need to get the parse error right for the case where the token 
+            #has a namespace not equal to the xmlns attribute
+            if token["selfClosing"]:
+                self.tree.openElements.pop()
+                token["selfClosingAcknowledged"] = True
+
+        def startTagMisplaced(self, token):
+            """ Elements that should be children of other elements that have a
+            different insertion mode; here they are ignored
+            "caption", "col", "colgroup", "frame", "frameset", "head",
+            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
+            "tr", "noscript"
+            """
+            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
+
+        def startTagOther(self, token):
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(token)
+
+        def endTagP(self, token):
+            if not self.tree.elementInScope("p", variant="button"):
+                self.startTagCloseP(impliedTagToken("p", "StartTag"))
+                self.parser.parseError("unexpected-end-tag", {"name": "p"})
+                self.endTagP(impliedTagToken("p", "EndTag"))
+            else:
+                self.tree.generateImpliedEndTags("p")
+                if self.tree.openElements[-1].name != "p":
+                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
+                node = self.tree.openElements.pop()
+                while node.name != "p":
+                    node = self.tree.openElements.pop()
+
+        def endTagBody(self, token):
+            if not self.tree.elementInScope("body"):
+                self.parser.parseError()
+                return
+            elif self.tree.openElements[-1].name != "body":
+                for node in self.tree.openElements[2:]:
+                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
+                                                   "option", "p", "rp", "rt",
+                                                   "tbody", "td", "tfoot",
+                                                   "th", "thead", "tr", "body",
+                                                   "html")):
+                        #Not sure this is the correct name for the parse error
+                        self.parser.parseError(
+                            "expected-one-end-tag-but-got-another",
+                            {"expectedName": "body", "gotName": node.name})
+                        break
+            self.parser.phase = self.parser.phases["afterBody"]
+
+        def endTagHtml(self, token):
+            #We repeat the test for the body end tag token being ignored here
+            if self.tree.elementInScope("body"):
+                self.endTagBody(impliedTagToken("body"))
+                return token
+
+        def endTagBlock(self, token):
+            #Put us back in the right whitespace handling mode
+            if token["name"] == "pre":
+                self.processSpaceCharacters = self.processSpaceCharactersNonPre
+            inScope = self.tree.elementInScope(token["name"])
+            if inScope:
+                self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != token["name"]:
+                 self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+            if inScope:
+                node = self.tree.openElements.pop()
+                while node.name != token["name"]:
+                    node = self.tree.openElements.pop()
+
+        def endTagForm(self, token):
+            node = self.tree.formPointer
+            self.tree.formPointer = None
+            if node is None or not self.tree.elementInScope(node):
+                self.parser.parseError("unexpected-end-tag",
+                                       {"name":"form"})
+            else:
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1] != node:
+                    self.parser.parseError("end-tag-too-early-ignored",
+                                           {"name": "form"})
+                self.tree.openElements.remove(node)
+
+        def endTagListItem(self, token):
+            if token["name"] == "li":
+                variant = "list"
+            else:
+                variant = None
+            if not self.tree.elementInScope(token["name"], variant=variant):
+                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+            else:
+                self.tree.generateImpliedEndTags(exclude = token["name"])
+                if self.tree.openElements[-1].name != token["name"]:
+                    self.parser.parseError(
+                        "end-tag-too-early",
+                        {"name": token["name"]})
+                node = self.tree.openElements.pop()
+                while node.name != token["name"]:
+                    node = self.tree.openElements.pop()
+
+        def endTagHeading(self, token):
+            for item in headingElements:
+                if self.tree.elementInScope(item):
+                    self.tree.generateImpliedEndTags()
+                    break
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+
+            for item in headingElements:
+                if self.tree.elementInScope(item):
+                    item = self.tree.openElements.pop()
+                    while item.name not in headingElements:
+                        item = self.tree.openElements.pop()
+                    break
+
+        def endTagFormatting(self, token):
+            """The much-feared adoption agency algorithm"""
+            # http://www.whatwg.org/specs/web-apps/current-work/#adoptionAgency
+            # XXX Better parseError messages appreciated.
+            name = token["name"]
+
+            outerLoopCounter = 0
+            while outerLoopCounter < 8:
+                outerLoopCounter += 1
+
+                # Step 1 paragraph 1
+                formattingElement = self.tree.elementInActiveFormattingElements(
+                    token["name"])
+                if (not formattingElement or 
+                    (formattingElement in self.tree.openElements and
+                     not self.tree.elementInScope(formattingElement.name))):
+                    self.parser.parseError("adoption-agency-1.1", {"name": token["name"]})
+                    return
+
+                # Step 1 paragraph 2
+                elif formattingElement not in self.tree.openElements:
+                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
+                    self.tree.activeFormattingElements.remove(formattingElement)
+                    return
+
+                # Step 1 paragraph 3
+                if formattingElement != self.tree.openElements[-1]:
+                    self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
+
+                # Step 2
+                # Start of the adoption agency algorithm proper
+                afeIndex = self.tree.openElements.index(formattingElement)
+                furthestBlock = None
+                for element in self.tree.openElements[afeIndex:]:
+                    if element.nameTuple in specialElements:
+                        furthestBlock = element
+                        break
+                # Step 3
+                if furthestBlock is None:
+                    element = self.tree.openElements.pop()
+                    while element != formattingElement:
+                        element = self.tree.openElements.pop()
+                    self.tree.activeFormattingElements.remove(element)
+                    return
+                commonAncestor = self.tree.openElements[afeIndex-1]
+
+                # Step 5
+                #if furthestBlock.parent:
+                #    furthestBlock.parent.removeChild(furthestBlock)
+
+                # Step 5
+                # The bookmark is supposed to help us identify where to reinsert
+                # nodes in step 12. We have to ensure that we reinsert nodes after
+                # the node before the active formatting element. Note the bookmark
+                # can move in step 7.4
+                bookmark = self.tree.activeFormattingElements.index(formattingElement)
+
+                # Step 6
+                lastNode = node = furthestBlock
+                innerLoopCounter = 0
+                
+                index = self.tree.openElements.index(node)
+                while innerLoopCounter < 3:
+                    innerLoopCounter += 1
+                    # Node is element before node in open elements
+                    index -= 1
+                    node = self.tree.openElements[index]
+                    if node not in self.tree.activeFormattingElements:
+                        self.tree.openElements.remove(node)
+                        continue
+                    # Step 6.3
+                    if node == formattingElement:
+                        break
+                    # Step 6.4
+                    if lastNode == furthestBlock:
+                        bookmark = (self.tree.activeFormattingElements.index(node)
+                                    + 1)
+                    # Step 6.5
+                    #cite = node.parent
+                    clone = node.cloneNode()
+                    # Replace node with clone
+                    self.tree.activeFormattingElements[
+                        self.tree.activeFormattingElements.index(node)] = clone
+                    self.tree.openElements[
+                        self.tree.openElements.index(node)] = clone
+                    node = clone
+
+                    # Step 6.6
+                    # Remove lastNode from its parents, if any
+                    if lastNode.parent:
+                        lastNode.parent.removeChild(lastNode)
+                    node.appendChild(lastNode)
+                    # Step 7.7
+                    lastNode = node
+                    # End of inner loop 
+
+                # Step 7
+                # Foster parent lastNode if commonAncestor is a
+                # table, tbody, tfoot, thead, or tr we need to foster parent the 
+                # lastNode
+                if lastNode.parent:
+                    lastNode.parent.removeChild(lastNode)
+
+                if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
+                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
+                    parent.insertBefore(lastNode, insertBefore)
+                else:
+                    commonAncestor.appendChild(lastNode)
+
+                # Step 8
+                clone = formattingElement.cloneNode()
+
+                # Step 9
+                furthestBlock.reparentChildren(clone)
+
+                # Step 10
+                furthestBlock.appendChild(clone)
+
+                # Step 11
+                self.tree.activeFormattingElements.remove(formattingElement)
+                self.tree.activeFormattingElements.insert(bookmark, clone)
+
+                # Step 12
+                self.tree.openElements.remove(formattingElement)
+                self.tree.openElements.insert(
+                  self.tree.openElements.index(furthestBlock) + 1, clone)
+
+        def endTagAppletMarqueeObject(self, token):
+            if self.tree.elementInScope(token["name"]):
+                self.tree.generateImpliedEndTags()
+            if self.tree.openElements[-1].name != token["name"]:
+                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
+
+            if self.tree.elementInScope(token["name"]):
+                element = self.tree.openElements.pop()
+                while element.name != token["name"]:
+                    element = self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+
+        def endTagBr(self, token):
+            self.parser.parseError("unexpected-end-tag-treated-as",
+              {"originalName": "br", "newName": "br element"})
+            self.tree.reconstructActiveFormattingElements()
+            self.tree.insertElement(impliedTagToken("br", "StartTag"))
+            self.tree.openElements.pop()
+
+        def endTagOther(self, token):
+            for node in self.tree.openElements[::-1]:
+                if node.name == token["name"]:
+                    self.tree.generateImpliedEndTags(exclude=token["name"])
+                    if self.tree.openElements[-1].name != token["name"]:
+                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                    while self.tree.openElements.pop() != node:
+                        pass
+                    break
+                else:
+                    if node.nameTuple in specialElements:
+                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+                        break
+
+    class TextPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.startTagHandler = utils.MethodDispatcher([])
+            self.startTagHandler.default = self.startTagOther
+            self.endTagHandler = utils.MethodDispatcher([
+                    ("script", self.endTagScript)])
+            self.endTagHandler.default = self.endTagOther
+
+        def processCharacters(self, token):
+            self.tree.insertText(token["data"])
+
+        def processEOF(self):
+            self.parser.parseError("expected-named-closing-tag-but-got-eof", 
+                                   self.tree.openElements[-1].name)
+            self.tree.openElements.pop()
+            self.parser.phase = self.parser.originalPhase
+            return True
+
+        def startTagOther(self, token):
+            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode"%token['name']
+
+        def endTagScript(self, token):
+            node = self.tree.openElements.pop()
+            assert node.name == "script"
+            self.parser.phase = self.parser.originalPhase
+            #The rest of this method is all stuff that only happens if
+            #document.write works
+
+        def endTagOther(self, token):
+            node = self.tree.openElements.pop()
+            self.parser.phase = self.parser.originalPhase
+
+    class InTablePhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("caption", self.startTagCaption),
+                ("colgroup", self.startTagColgroup),
+                ("col", self.startTagCol),
+                (("tbody", "tfoot", "thead"), self.startTagRowGroup),
+                (("td", "th", "tr"), self.startTagImplyTbody),
+                ("table", self.startTagTable),
+                (("style", "script"), self.startTagStyleScript),
+                ("input", self.startTagInput),
+                ("form", self.startTagForm)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("table", self.endTagTable),
+                (("body", "caption", "col", "colgroup", "html", "tbody", "td",
+                  "tfoot", "th", "thead", "tr"), self.endTagIgnore)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # helper methods
+        def clearStackToTableContext(self):
+            # "clear the stack back to a table context"
+            while self.tree.openElements[-1].name not in ("table", "html"):
+                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                #  {"name":  self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+            # When the current node is <html> it's an innerHTML case
+
+        # processing methods
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-table")
+            else:
+                assert self.parser.innerHTML
+            #Stop parsing
+
+        def processSpaceCharacters(self, token):
+            originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["inTableText"]
+            self.parser.phase.originalPhase = originalPhase
+            self.parser.phase.processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            originalPhase = self.parser.phase
+            self.parser.phase = self.parser.phases["inTableText"]
+            self.parser.phase.originalPhase = originalPhase
+            self.parser.phase.processCharacters(token)
+
+        def insertText(self, token):
+            #If we get here there must be at least one non-whitespace character
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processCharacters(token)
+            self.tree.insertFromTable = False
+
+        def startTagCaption(self, token):
+            self.clearStackToTableContext()
+            self.tree.activeFormattingElements.append(Marker)
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inCaption"]
+
+        def startTagColgroup(self, token):
+            self.clearStackToTableContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inColumnGroup"]
+
+        def startTagCol(self, token):
+            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
+            return token
+
+        def startTagRowGroup(self, token):
+            self.clearStackToTableContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inTableBody"]
+
+        def startTagImplyTbody(self, token):
+            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
+            return token
+
+        def startTagTable(self, token):
+            self.parser.parseError("unexpected-start-tag-implies-end-tag",
+              {"startName": "table", "endName": "table"})
+            self.parser.phase.processEndTag(impliedTagToken("table"))
+            if not self.parser.innerHTML:
+                return token
+
+        def startTagStyleScript(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagInput(self, token):
+            if ("type" in token["data"] and 
+                token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                self.parser.parseError("unexpected-hidden-input-in-table")
+                self.tree.insertElement(token)
+                # XXX associate with form
+                self.tree.openElements.pop()
+            else:
+                self.startTagOther(token)
+
+        def startTagForm(self, token):
+            self.parser.parseError("unexpected-form-in-table")
+            if self.tree.formPointer is None:
+                self.tree.insertElement(token)
+                self.tree.formPointer = self.tree.openElements[-1]
+                self.tree.openElements.pop()
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processStartTag(token)
+            self.tree.insertFromTable = False
+
+        def endTagTable(self, token):
+            if self.tree.elementInScope("table", variant="table"):
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "table":
+                    self.parser.parseError("end-tag-too-early-named",
+                      {"gotName": "table",
+                       "expectedName": self.tree.openElements[-1].name})
+                while self.tree.openElements[-1].name != "table":
+                    self.tree.openElements.pop()
+                self.tree.openElements.pop()
+                self.parser.resetInsertionMode()
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
+            # Do the table magic!
+            self.tree.insertFromTable = True
+            self.parser.phases["inBody"].processEndTag(token)
+            self.tree.insertFromTable = False
+
+    class InTableTextPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.originalPhase = None
+            self.characterTokens = []
+
+        def flushCharacters(self):
+            data = "".join([item["data"] for item in self.characterTokens])
+            if any([item not in spaceCharacters for item in data]):
+                token = {"type":tokenTypes["Characters"], "data":data}
+                self.parser.phases["inTable"].insertText(token)
+            elif data:
+                self.tree.insertText(data)
+            self.characterTokens = []
+
+        def processComment(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+        def processEOF(self):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return True
+
+        def processCharacters(self, token):
+            if token["data"] == u"\u0000":
+                return
+            self.characterTokens.append(token)
+
+        def processSpaceCharacters(self, token):
+            #pretty sure we should never reach here
+            self.characterTokens.append(token)
+    #        assert False
+
+        def processStartTag(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+        def processEndTag(self, token):
+            self.flushCharacters()
+            self.parser.phase = self.originalPhase
+            return token
+
+
+    class InCaptionPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+                  "thead", "tr"), self.startTagTableElement)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("caption", self.endTagCaption),
+                ("table", self.endTagTable),
+                (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
+                  "thead", "tr"), self.endTagIgnore)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def ignoreEndTagCaption(self):
+            return not self.tree.elementInScope("caption", variant="table")
+
+        def processEOF(self):
+            self.parser.phases["inBody"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inBody"].processCharacters(token)
+
+        def startTagTableElement(self, token):
+            self.parser.parseError()
+            #XXX Have to duplicate logic here to find out if the tag is ignored
+            ignoreEndTag = self.ignoreEndTagCaption()
+            self.parser.phase.processEndTag(impliedTagToken("caption"))
+            if not ignoreEndTag:
+                return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def endTagCaption(self, token):
+            if not self.ignoreEndTagCaption():
+                # AT this code is quite similar to endTagTable in "InTable"
+                self.tree.generateImpliedEndTags()
+                if self.tree.openElements[-1].name != "caption":
+                    self.parser.parseError("expected-one-end-tag-but-got-another",
+                      {"gotName": "caption",
+                       "expectedName": self.tree.openElements[-1].name})
+                while self.tree.openElements[-1].name != "caption":
+                    self.tree.openElements.pop()
+                self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+                self.parser.phase = self.parser.phases["inTable"]
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagTable(self, token):
+            self.parser.parseError()
+            ignoreEndTag = self.ignoreEndTagCaption()
+            self.parser.phase.processEndTag(impliedTagToken("caption"))
+            if not ignoreEndTag:
+                return token
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inBody"].processEndTag(token)
+
+
+    class InColumnGroupPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
+
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("col", self.startTagCol)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("colgroup", self.endTagColgroup),
+                ("col", self.endTagCol)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def ignoreEndTagColgroup(self):
+            return self.tree.openElements[-1].name == "html"
+
+        def processEOF(self):
+            if self.tree.openElements[-1].name == "html":
+                assert self.parser.innerHTML
+                return
+            else:
+                ignoreEndTag = self.ignoreEndTagColgroup()
+                self.endTagColgroup(impliedTagToken("colgroup"))
+                if not ignoreEndTag:
+                    return True
+
+        def processCharacters(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+        def startTagCol(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+
+        def startTagOther(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+        def endTagColgroup(self, token):
+            if self.ignoreEndTagColgroup():
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+            else:
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTable"]
+
+        def endTagCol(self, token):
+            self.parser.parseError("no-end-tag", {"name": "col"})
+
+        def endTagOther(self, token):
+            ignoreEndTag = self.ignoreEndTagColgroup()
+            self.endTagColgroup(impliedTagToken("colgroup"))
+            if not ignoreEndTag:
+                return token
+
+
+    class InTableBodyPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("tr", self.startTagTr),
+                (("td", "th"), self.startTagTableCell),
+                (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
+                 self.startTagTableOther)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
+                ("table", self.endTagTable),
+                (("body", "caption", "col", "colgroup", "html", "td", "th",
+                  "tr"), self.endTagIgnore)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # helper methods
+        def clearStackToTableBodyContext(self):
+            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
+              "thead", "html"):
+                #self.parser.parseError("unexpected-implied-end-tag-in-table",
+                #  {"name": self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+            if self.tree.openElements[-1].name == "html":
+                assert self.parser.innerHTML
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inTable"].processEOF()
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inTable"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            return self.parser.phases["inTable"].processCharacters(token)
+
+        def startTagTr(self, token):
+            self.clearStackToTableBodyContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inRow"]
+
+        def startTagTableCell(self, token):
+            self.parser.parseError("unexpected-cell-in-table-body", 
+                                   {"name": token["name"]})
+            self.startTagTr(impliedTagToken("tr", "StartTag"))
+            return token
+
+        def startTagTableOther(self, token):
+            # XXX AT Any ideas on how to share this with endTagTable?
+            if (self.tree.elementInScope("tbody", variant="table") or
+                self.tree.elementInScope("thead", variant="table") or
+                self.tree.elementInScope("tfoot", variant="table")):
+                self.clearStackToTableBodyContext()
+                self.endTagTableRowGroup(
+                    impliedTagToken(self.tree.openElements[-1].name))
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def startTagOther(self, token):
+            return self.parser.phases["inTable"].processStartTag(token)
+
+        def endTagTableRowGroup(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.clearStackToTableBodyContext()
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTable"]
+            else:
+                self.parser.parseError("unexpected-end-tag-in-table-body",
+                  {"name": token["name"]})
+
+        def endTagTable(self, token):
+            if (self.tree.elementInScope("tbody", variant="table") or
+                self.tree.elementInScope("thead", variant="table") or
+                self.tree.elementInScope("tfoot", variant="table")):
+                self.clearStackToTableBodyContext()
+                self.endTagTableRowGroup(
+                    impliedTagToken(self.tree.openElements[-1].name))
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag-in-table-body",
+              {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inTable"].processEndTag(token)
+
+
+    class InRowPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("td", "th"), self.startTagTableCell),
+                (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
+                  "tr"), self.startTagTableOther)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("tr", self.endTagTr),
+                ("table", self.endTagTable),
+                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
+                (("body", "caption", "col", "colgroup", "html", "td", "th"),
+                  self.endTagIgnore)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # helper methods (XXX unify this with other table helper methods)
+        def clearStackToTableRowContext(self):
+            while self.tree.openElements[-1].name not in ("tr", "html"):
+                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
+                  {"name": self.tree.openElements[-1].name})
+                self.tree.openElements.pop()
+
+        def ignoreEndTagTr(self):
+            return not self.tree.elementInScope("tr", variant="table")
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inTable"].processEOF()
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inTable"].processSpaceCharacters(token)        
+
+        def processCharacters(self, token):
+            return self.parser.phases["inTable"].processCharacters(token)
+
+        def startTagTableCell(self, token):
+            self.clearStackToTableRowContext()
+            self.tree.insertElement(token)
+            self.parser.phase = self.parser.phases["inCell"]
+            self.tree.activeFormattingElements.append(Marker)
+
+        def startTagTableOther(self, token):
+            ignoreEndTag = self.ignoreEndTagTr()
+            self.endTagTr(impliedTagToken("tr"))
+            # XXX how are we sure it's always ignored in the innerHTML case?
+            if not ignoreEndTag:
+                return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inTable"].processStartTag(token)
+
+        def endTagTr(self, token):
+            if not self.ignoreEndTagTr():
+                self.clearStackToTableRowContext()
+                self.tree.openElements.pop()
+                self.parser.phase = self.parser.phases["inTableBody"]
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagTable(self, token):
+            ignoreEndTag = self.ignoreEndTagTr()
+            self.endTagTr(impliedTagToken("tr"))
+            # Reprocess the current tag if the tr end tag was not ignored
+            # XXX how are we sure it's always ignored in the innerHTML case?
+            if not ignoreEndTag:
+                return token
+
+        def endTagTableRowGroup(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.endTagTr(impliedTagToken("tr"))
+                return token
+            else:
+                self.parser.parseError()
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag-in-table-row",
+                {"name": token["name"]})
+
+        def endTagOther(self, token):
+            return self.parser.phases["inTable"].processEndTag(token)
+
+    class InCellPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
+                  "thead", "tr"), self.startTagTableOther)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                (("td", "th"), self.endTagTableCell),
+                (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
+                (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # helper
+        def closeCell(self):
+            if self.tree.elementInScope("td", variant="table"):
+                self.endTagTableCell(impliedTagToken("td"))
+            elif self.tree.elementInScope("th", variant="table"):
+                self.endTagTableCell(impliedTagToken("th"))
+
+        # the rest
+        def processEOF(self):
+            self.parser.phases["inBody"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inBody"].processCharacters(token)
+
+        def startTagTableOther(self, token):
+            if (self.tree.elementInScope("td", variant="table") or
+                self.tree.elementInScope("th", variant="table")):
+                self.closeCell()
+                return token
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def startTagOther(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def endTagTableCell(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.tree.generateImpliedEndTags(token["name"])
+                if self.tree.openElements[-1].name != token["name"]:
+                    self.parser.parseError("unexpected-cell-end-tag",
+                      {"name": token["name"]})
+                    while True:
+                        node = self.tree.openElements.pop()
+                        if node.name == token["name"]:
+                            break
+                else:
+                    self.tree.openElements.pop()
+                self.tree.clearActiveFormattingElements()
+                self.parser.phase = self.parser.phases["inRow"]
+            else:
+                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagIgnore(self, token):
+            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
+
+        def endTagImply(self, token):
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.closeCell()
+                return token
+            else:
+                # sometimes innerHTML case
+                self.parser.parseError()
+
+        def endTagOther(self, token):
+            return self.parser.phases["inBody"].processEndTag(token)
+
+    class InSelectPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("option", self.startTagOption),
+                ("optgroup", self.startTagOptgroup),
+                ("select", self.startTagSelect),
+                (("input", "keygen", "textarea"), self.startTagInput),
+                ("script", self.startTagScript)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("option", self.endTagOption),
+                ("optgroup", self.endTagOptgroup),
+                ("select", self.endTagSelect)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-select")
+            else:
+                assert self.parser.innerHTML
+
+        def processCharacters(self, token):
+            if token["data"] == u"\u0000":
+                return
+            self.tree.insertText(token["data"])
+
+        def startTagOption(self, token):
+            # We need to imply </option> if <option> is the current node.
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagOptgroup(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            if self.tree.openElements[-1].name == "optgroup":
+                self.tree.openElements.pop()
+            self.tree.insertElement(token)
+
+        def startTagSelect(self, token):
+            self.parser.parseError("unexpected-select-in-select")
+            self.endTagSelect(impliedTagToken("select"))
+
+        def startTagInput(self, token):
+            self.parser.parseError("unexpected-input-in-select")
+            if self.tree.elementInScope("select", variant="select"):
+                self.endTagSelect(impliedTagToken("select"))
+                return token
+            else:
+                assert self.parser.innerHTML
+
+        def startTagScript(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-in-select",
+              {"name": token["name"]})
+
+        def endTagOption(self, token):
+            if self.tree.openElements[-1].name == "option":
+                self.tree.openElements.pop()
+            else:
+                self.parser.parseError("unexpected-end-tag-in-select",
+                  {"name": "option"})
+
+        def endTagOptgroup(self, token):
+            # </optgroup> implicitly closes <option>
+            if (self.tree.openElements[-1].name == "option" and
+                self.tree.openElements[-2].name == "optgroup"):
+                self.tree.openElements.pop()
+            # It also closes </optgroup>
+            if self.tree.openElements[-1].name == "optgroup":
+                self.tree.openElements.pop()
+            # But nothing else
+            else:
+                self.parser.parseError("unexpected-end-tag-in-select",
+                  {"name": "optgroup"})
+
+        def endTagSelect(self, token):
+            if self.tree.elementInScope("select", variant="select"):
+                node = self.tree.openElements.pop()
+                while node.name != "select":
+                    node = self.tree.openElements.pop()
+                self.parser.resetInsertionMode()
+            else:
+                # innerHTML case
+                assert self.parser.innerHTML
+                self.parser.parseError()
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-in-select",
+              {"name": token["name"]})
+
+
+    class InSelectInTablePhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+                 self.startTagTable)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
+                 self.endTagTable)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            self.parser.phases["inSelect"].processEOF()
+
+        def processCharacters(self, token):
+            return self.parser.phases["inSelect"].processCharacters(token)
+
+        def startTagTable(self, token):
+            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
+            self.endTagOther(impliedTagToken("select"))
+            return token
+
+        def startTagOther(self, token):
+            return self.parser.phases["inSelect"].processStartTag(token)
+
+        def endTagTable(self, token):
+            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
+            if self.tree.elementInScope(token["name"], variant="table"):
+                self.endTagOther(impliedTagToken("select"))
+                return token
+
+        def endTagOther(self, token):
+            return self.parser.phases["inSelect"].processEndTag(token)
+
+
+    class InForeignContentPhase(Phase):
+        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", 
+                                      "center", "code", "dd", "div", "dl", "dt",
+                                      "em", "embed", "h1", "h2", "h3", 
+                                      "h4", "h5", "h6", "head", "hr", "i", "img",
+                                      "li", "listing", "menu", "meta", "nobr", 
+                                      "ol", "p", "pre", "ruby", "s",  "small", 
+                                      "span", "strong", "strike",  "sub", "sup", 
+                                      "table", "tt", "u", "ul", "var"])
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+        def adjustSVGTagNames(self, token):
+            replacements = {u"altglyph":u"altGlyph",
+                            u"altglyphdef":u"altGlyphDef",
+                            u"altglyphitem":u"altGlyphItem",
+                            u"animatecolor":u"animateColor",
+                            u"animatemotion":u"animateMotion",
+                            u"animatetransform":u"animateTransform",
+                            u"clippath":u"clipPath",
+                            u"feblend":u"feBlend",
+                            u"fecolormatrix":u"feColorMatrix",
+                            u"fecomponenttransfer":u"feComponentTransfer",
+                            u"fecomposite":u"feComposite",
+                            u"feconvolvematrix":u"feConvolveMatrix",
+                            u"fediffuselighting":u"feDiffuseLighting",
+                            u"fedisplacementmap":u"feDisplacementMap",
+                            u"fedistantlight":u"feDistantLight",
+                            u"feflood":u"feFlood",
+                            u"fefunca":u"feFuncA",
+                            u"fefuncb":u"feFuncB",
+                            u"fefuncg":u"feFuncG",
+                            u"fefuncr":u"feFuncR",
+                            u"fegaussianblur":u"feGaussianBlur",
+                            u"feimage":u"feImage",
+                            u"femerge":u"feMerge",
+                            u"femergenode":u"feMergeNode",
+                            u"femorphology":u"feMorphology",
+                            u"feoffset":u"feOffset",
+                            u"fepointlight":u"fePointLight",
+                            u"fespecularlighting":u"feSpecularLighting",
+                            u"fespotlight":u"feSpotLight",
+                            u"fetile":u"feTile",
+                            u"feturbulence":u"feTurbulence",
+                            u"foreignobject":u"foreignObject",
+                            u"glyphref":u"glyphRef",
+                            u"lineargradient":u"linearGradient",
+                            u"radialgradient":u"radialGradient",
+                            u"textpath":u"textPath"}
+
+            if token["name"] in replacements:
+                token["name"] = replacements[token["name"]]
+
+        def processCharacters(self, token):
+            if token["data"] == u"\u0000":
+                token["data"] = u"\uFFFD"
+            elif (self.parser.framesetOK and 
+                  any(char not in spaceCharacters for char in token["data"])):
+                self.parser.framesetOK = False
+            Phase.processCharacters(self, token)
+
+        def processStartTag(self, token):
+            currentNode = self.tree.openElements[-1]
+            if (token["name"] in self.breakoutElements or
+                (token["name"] == "font" and
+                 set(token["data"].keys()) & set(["color", "face", "size"]))):
+                self.parser.parseError("unexpected-html-element-in-foreign-content",
+                                       token["name"])
+                while (self.tree.openElements[-1].namespace !=
+                       self.tree.defaultNamespace and 
+                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
+                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
+                    self.tree.openElements.pop()
+                return token
+
+            else:
+                if currentNode.namespace == namespaces["mathml"]:
+                    self.parser.adjustMathMLAttributes(token)
+                elif currentNode.namespace == namespaces["svg"]:
+                    self.adjustSVGTagNames(token)
+                    self.parser.adjustSVGAttributes(token)
+                self.parser.adjustForeignAttributes(token)
+                token["namespace"] = currentNode.namespace
+                self.tree.insertElement(token)
+                if token["selfClosing"]:
+                    self.tree.openElements.pop()
+                    token["selfClosingAcknowledged"] = True
+
+        def processEndTag(self, token):
+            nodeIndex = len(self.tree.openElements) - 1
+            node = self.tree.openElements[-1]
+            if node.name != token["name"]:
+                self.parser.parseError("unexpected-end-tag", token["name"])
+
+            while True:
+                if node.name.translate(asciiUpper2Lower) == token["name"]:
+                    #XXX this isn't in the spec but it seems necessary
+                    if self.parser.phase == self.parser.phases["inTableText"]:
+                        self.parser.phase.flushCharacters()
+                        self.parser.phase = self.parser.phase.originalPhase
+                    while self.tree.openElements.pop() != node:
+                        assert self.tree.openElements
+                    new_token = None
+                    break
+                nodeIndex -= 1
+
+                node = self.tree.openElements[nodeIndex]
+                if node.namespace != self.tree.defaultNamespace:
+                    continue
+                else:
+                    new_token = self.parser.phase.processEndTag(token)
+                    break
+            return new_token
+
+
+    class AfterBodyPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                    ("html", self.startTagHtml)
+                    ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            #Stop parsing
+            pass
+
+        def processComment(self, token):
+            # This is needed because data is to be appended to the <html> element
+            # here and not to whatever is currently open.
+            self.tree.insertComment(token, self.tree.openElements[0])
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-after-body")
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-after-body",
+              {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def endTagHtml(self,name):
+            if self.parser.innerHTML:
+                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
+            else:
+                self.parser.phase = self.parser.phases["afterAfterBody"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-after-body",
+              {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+    class InFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("frameset", self.startTagFrameset),
+                ("frame", self.startTagFrame),
+                ("noframes", self.startTagNoframes)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("frameset", self.endTagFrameset)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            if self.tree.openElements[-1].name != "html":
+                self.parser.parseError("eof-in-frameset")
+            else:
+                assert self.parser.innerHTML
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-in-frameset")
+
+        def startTagFrameset(self, token):
+            self.tree.insertElement(token)
+
+        def startTagFrame(self, token):
+            self.tree.insertElement(token)
+            self.tree.openElements.pop()
+
+        def startTagNoframes(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-in-frameset",
+              {"name": token["name"]})
+
+        def endTagFrameset(self, token):
+            if self.tree.openElements[-1].name == "html":
+                # innerHTML case
+                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
+            else:
+                self.tree.openElements.pop()
+            if (not self.parser.innerHTML and
+                self.tree.openElements[-1].name != "frameset"):
+                # If we're not in innerHTML mode and the the current node is not a
+                # "frameset" element (anymore) then switch.
+                self.parser.phase = self.parser.phases["afterFrameset"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-in-frameset",
+              {"name": token["name"]})
+
+
+    class AfterFramesetPhase(Phase):
+        # http://www.whatwg.org/specs/web-apps/current-work/#after3
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("noframes", self.startTagNoframes)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+            self.endTagHandler = utils.MethodDispatcher([
+                ("html", self.endTagHtml)
+            ])
+            self.endTagHandler.default = self.endTagOther
+
+        def processEOF(self):
+            #Stop parsing
+            pass
+
+        def processCharacters(self, token):
+            self.parser.parseError("unexpected-char-after-frameset")
+
+        def startTagNoframes(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("unexpected-start-tag-after-frameset",
+              {"name": token["name"]})
+
+        def endTagHtml(self, token):
+            self.parser.phase = self.parser.phases["afterAfterFrameset"]
+
+        def endTagOther(self, token):
+            self.parser.parseError("unexpected-end-tag-after-frameset",
+              {"name": token["name"]})
+
+
+    class AfterAfterBodyPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+        def processEOF(self):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-eof-but-got-char")
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("expected-eof-but-got-start-tag",
+              {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-eof-but-got-end-tag",
+              {"name": token["name"]})
+            self.parser.phase = self.parser.phases["inBody"]
+            return token
+
+    class AfterAfterFramesetPhase(Phase):
+        def __init__(self, parser, tree):
+            Phase.__init__(self, parser, tree)
+
+            self.startTagHandler = utils.MethodDispatcher([
+                ("html", self.startTagHtml),
+                ("noframes", self.startTagNoFrames)
+            ])
+            self.startTagHandler.default = self.startTagOther
+
+        def processEOF(self):
+            pass
+
+        def processComment(self, token):
+            self.tree.insertComment(token, self.tree.document)
+
+        def processSpaceCharacters(self, token):
+            return self.parser.phases["inBody"].processSpaceCharacters(token)
+
+        def processCharacters(self, token):
+            self.parser.parseError("expected-eof-but-got-char")
+
+        def startTagHtml(self, token):
+            return self.parser.phases["inBody"].processStartTag(token)
+
+        def startTagNoFrames(self, token):
+            return self.parser.phases["inHead"].processStartTag(token)
+
+        def startTagOther(self, token):
+            self.parser.parseError("expected-eof-but-got-start-tag",
+              {"name": token["name"]})
+
+        def processEndTag(self, token):
+            self.parser.parseError("expected-eof-but-got-end-tag",
+              {"name": token["name"]})
+
+
+    return {
+        "initial": InitialPhase,
+        "beforeHtml": BeforeHtmlPhase,
+        "beforeHead": BeforeHeadPhase,
+        "inHead": InHeadPhase,
+        # XXX "inHeadNoscript": InHeadNoScriptPhase,
+        "afterHead": AfterHeadPhase,
+        "inBody": InBodyPhase,
+        "text": TextPhase,
+        "inTable": InTablePhase,
+        "inTableText": InTableTextPhase,
+        "inCaption": InCaptionPhase,
+        "inColumnGroup": InColumnGroupPhase,
+        "inTableBody": InTableBodyPhase,
+        "inRow": InRowPhase,
+        "inCell": InCellPhase,
+        "inSelect": InSelectPhase,
+        "inSelectInTable": InSelectInTablePhase,
+        "inForeignContent": InForeignContentPhase,
+        "afterBody": AfterBodyPhase,
+        "inFrameset": InFramesetPhase,
+        "afterFrameset": AfterFramesetPhase,
+        "afterAfterBody": AfterAfterBodyPhase,
+        "afterAfterFrameset": AfterAfterFramesetPhase,
+        # XXX after after frameset
+        }
+
+def impliedTagToken(name, type="EndTag", attributes = None, 
+                    selfClosing = False):
+    if attributes is None:
+        attributes = {}
+    return {"type":tokenTypes[type], "name":unicode(name), "data":attributes,
+            "selfClosing":selfClosing}
+
+class ParseError(Exception):
+    """Error in parsed document"""
+    pass
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
new file mode 100644
index 00000000..dd785639
--- /dev/null
+++ b/html5lib/ihatexml.py
@@ -0,0 +1,177 @@
+import re
+
+baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
+
+ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
+
+combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
+
+digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
+
+extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
+
+letter = " | ".join([baseChar, ideographic])
+
+#Without the 
+name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, 
+                       extender])
+nameFirst = " | ".join([letter, "_"])
+
+reChar = re.compile(r"#x([\d|A-F]{4,4})")
+reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
+
+def charStringToList(chars):
+    charRanges = [item.strip() for item in chars.split(" | ")]
+    rv = []
+    for item in charRanges:
+        foundMatch = False
+        for regexp in (reChar, reCharRange):
+            match = regexp.match(item)
+            if match is not None:
+                rv.append([hexToInt(item) for item in match.groups()])
+                if len(rv[-1]) == 1:
+                    rv[-1] = rv[-1]*2
+                foundMatch = True
+                break
+        if not foundMatch:
+            assert len(item) == 1
+            
+            rv.append([ord(item)] * 2)
+    rv = normaliseCharList(rv)
+    return rv
+
+def normaliseCharList(charList):
+    charList = sorted(charList)
+    for item in charList:
+        assert item[1] >= item[0]
+    rv = []
+    i = 0
+    while i < len(charList):
+        j = 1
+        rv.append(charList[i])
+        while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
+            rv[-1][1] = charList[i+j][1]
+            j += 1
+        i += j
+    return rv
+
+#We don't really support characters above the BMP :(
+max_unicode = int("FFFF", 16)
+    
+def missingRanges(charList):
+    rv = []
+    if charList[0] != 0:
+        rv.append([0, charList[0][0] - 1])
+    for i, item in enumerate(charList[:-1]):
+        rv.append([item[1]+1, charList[i+1][0] - 1])
+    if charList[-1][1] != max_unicode:
+        rv.append([charList[-1][1] + 1, max_unicode])
+    return rv
+
+def listToRegexpStr(charList):
+    rv = []
+    for item in charList:
+        if item[0] == item[1]:
+           rv.append(escapeRegexp(unichr(item[0])))
+        else:
+            rv.append(escapeRegexp(unichr(item[0])) + "-" +
+                      escapeRegexp(unichr(item[1])))
+    return "[%s]"%"".join(rv)
+
+def hexToInt(hex_str):
+    return int(hex_str, 16)
+
+def escapeRegexp(string):
+    specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
+                          "[", "]", "|", "(", ")", "-")
+    for char in specialCharacters:
+        string = string.replace(char, "\\" + char)
+        if char in string:
+            print string
+
+    return string
+
+#output from the above
+nonXmlNameBMPRegexp = re.compile(u'[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+
+nonXmlNameFirstBMPRegexp = re.compile(u'[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
+
+class InfosetFilter(object):
+    replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
+    def __init__(self, replaceChars = None,  
+                 dropXmlnsLocalName = False, 
+                 dropXmlnsAttrNs = False,
+                 preventDoubleDashComments = False,
+                 preventDashAtCommentEnd = False,
+                 replaceFormFeedCharacters = True):
+
+        self.dropXmlnsLocalName = dropXmlnsLocalName
+        self.dropXmlnsAttrNs = dropXmlnsAttrNs
+
+        self.preventDoubleDashComments = preventDoubleDashComments
+        self.preventDashAtCommentEnd = preventDashAtCommentEnd
+
+        self.replaceFormFeedCharacters = replaceFormFeedCharacters
+
+        self.replaceCache = {}
+
+    def coerceAttribute(self, name, namespace=None):
+        if self.dropXmlnsLocalName and name.startswith("xmlns:"):
+            #Need a datalosswarning here
+            return None
+        elif (self.dropXmlnsAttrNs and 
+              namespace == "http://www.w3.org/2000/xmlns/"):
+            return None
+        else:
+            return self.toXmlName(name)
+
+    def coerceElement(self, name, namespace=None):
+        return self.toXmlName(name)
+
+    def coerceComment(self, data):
+        if self.preventDoubleDashComments:
+            while "--" in data:
+                data = data.replace("--", "- -")
+        return data
+    
+    def coerceCharacters(self, data):
+        if self.replaceFormFeedCharacters:
+            data = data.replace("\x0C", " ")
+        #Other non-xml characters
+        return data
+
+    def toXmlName(self, name):
+        nameFirst = name[0]
+        nameRest = name[1:]
+        m = nonXmlNameFirstBMPRegexp.match(nameFirst)
+        if m:
+            nameFirstOutput = self.getReplacementCharacter(nameFirst)
+        else:
+            nameFirstOutput = nameFirst
+
+        nameRestOutput = nameRest
+        replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
+        for char in replaceChars:
+            replacement = self.getReplacementCharacter(char)
+            nameRestOutput = nameRestOutput.replace(char, replacement)
+        return nameFirstOutput + nameRestOutput
+    
+    def getReplacementCharacter(self, char):
+        if char in self.replaceCache:
+            replacement = self.replaceCache[char]
+        else:
+            replacement = self.escapeChar(char)
+        return replacement
+
+    def fromXmlName(self, name):
+        for item in set(self.replacementRegexp.findall(name)):
+            name = name.replace(item, self.unescapeChar(item))
+        return name
+
+    def escapeChar(self, char):
+        replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0")
+        self.replaceCache[char] = replacement
+        return replacement
+
+    def unescapeChar(self, charcode):
+        return unichr(int(charcode[1:], 16))
diff --git a/html5lib/inputstream.py b/html5lib/inputstream.py
new file mode 100644
index 00000000..edec1329
--- /dev/null
+++ b/html5lib/inputstream.py
@@ -0,0 +1,782 @@
+import codecs
+import re
+import types
+import sys
+
+from constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
+from constants import encodings, ReparseException
+import utils
+
+#Non-unicode versions of constants for use in the pre-parser
+spaceCharactersBytes = frozenset([str(item) for item in spaceCharacters])
+asciiLettersBytes = frozenset([str(item) for item in asciiLetters])
+asciiUppercaseBytes = frozenset([str(item) for item in asciiUppercase])
+spacesAngleBrackets = spaceCharactersBytes | frozenset([">", "<"])
+
+invalid_unicode_re = re.compile(u"[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uD800-\uDFFF\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]")
+
+non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                                  0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
+                                  0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
+                                  0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
+                                  0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
+                                  0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
+                                  0x10FFFE, 0x10FFFF])
+
+ascii_punctuation_re = re.compile(ur"[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005B-\u0060\u007B-\u007E]")
+
+# Cache for charsUntil()
+charsUntilRegEx = {}
+        
+class BufferedStream:
+    """Buffering for streams that do not have buffering of their own
+
+    The buffer is implemented as a list of chunks on the assumption that 
+    joining many strings will be slow since it is O(n**2)
+    """
+    
+    def __init__(self, stream):
+        self.stream = stream
+        self.buffer = []
+        self.position = [-1,0] #chunk number, offset
+
+    def tell(self):
+        pos = 0
+        for chunk in self.buffer[:self.position[0]]:
+            pos += len(chunk)
+        pos += self.position[1]
+        return pos
+
+    def seek(self, pos):
+        assert pos < self._bufferedBytes()
+        offset = pos
+        i = 0
+        while len(self.buffer[i]) < offset:
+            offset -= pos
+            i += 1
+        self.position = [i, offset]
+
+    def read(self, bytes):
+        if not self.buffer:
+            return self._readStream(bytes)
+        elif (self.position[0] == len(self.buffer) and
+              self.position[1] == len(self.buffer[-1])):
+            return self._readStream(bytes)
+        else:
+            return self._readFromBuffer(bytes)
+    
+    def _bufferedBytes(self):
+        return sum([len(item) for item in self.buffer])
+
+    def _readStream(self, bytes):
+        data = self.stream.read(bytes)
+        self.buffer.append(data)
+        self.position[0] += 1
+        self.position[1] = len(data)
+        return data
+
+    def _readFromBuffer(self, bytes):
+        remainingBytes = bytes
+        rv = []
+        bufferIndex = self.position[0]
+        bufferOffset = self.position[1]
+        while bufferIndex < len(self.buffer) and remainingBytes != 0:
+            assert remainingBytes > 0
+            bufferedData = self.buffer[bufferIndex]
+            
+            if remainingBytes <= len(bufferedData) - bufferOffset:
+                bytesToRead = remainingBytes
+                self.position = [bufferIndex, bufferOffset + bytesToRead]
+            else:
+                bytesToRead = len(bufferedData) - bufferOffset
+                self.position = [bufferIndex, len(bufferedData)]
+                bufferIndex += 1
+            data = rv.append(bufferedData[bufferOffset: 
+                                          bufferOffset + bytesToRead])
+            remainingBytes -= bytesToRead
+
+            bufferOffset = 0
+
+        if remainingBytes:
+            rv.append(self._readStream(remainingBytes))
+        
+        return "".join(rv)
+        
+
+
+class HTMLInputStream:
+    """Provides a unicode stream of characters to the HTMLTokenizer.
+
+    This class takes care of character encoding and removing or replacing
+    incorrect byte-sequences and also provides column and line tracking.
+
+    """
+
+    _defaultChunkSize = 10240
+
+    def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
+        """Initialises the HTMLInputStream.
+
+        HTMLInputStream(source, [encoding]) -> Normalized stream from source
+        for use by html5lib.
+
+        source can be either a file-object, local filename or a string.
+
+        The optional encoding parameter must be a string that indicates
+        the encoding.  If specified, that encoding will be used,
+        regardless of any BOM or later declaration (such as in a meta
+        element)
+        
+        parseMeta - Look for a <meta> element containing encoding information
+
+        """
+
+        #Craziness
+        if len(u"\U0010FFFF") == 1:
+            self.reportCharacterErrors = self.characterErrorsUCS4
+            self.replaceCharactersRegexp = re.compile(u"[\uD800-\uDFFF]")
+        else:
+            self.reportCharacterErrors = self.characterErrorsUCS2
+            self.replaceCharactersRegexp = re.compile(u"([\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?<![\uD800-\uDBFF])[\uDC00-\uDFFF])")
+
+        # List of where new lines occur
+        self.newLines = [0]
+
+        self.charEncoding = (codecName(encoding), "certain")
+
+        # Raw Stream - for unicode objects this will encode to utf-8 and set
+        #              self.charEncoding as appropriate
+        self.rawStream = self.openStream(source)
+
+        # Encoding Information
+        #Number of bytes to use when looking for a meta element with
+        #encoding information
+        self.numBytesMeta = 512
+        #Number of bytes to use when using detecting encoding using chardet
+        self.numBytesChardet = 100
+        #Encoding to use if no other information can be found
+        self.defaultEncoding = "windows-1252"
+        
+        #Detect encoding iff no explicit "transport level" encoding is supplied
+        if (self.charEncoding[0] is None):
+            self.charEncoding = self.detectEncoding(parseMeta, chardet)
+
+
+        self.reset()
+
+    def reset(self):
+        self.dataStream = codecs.getreader(self.charEncoding[0])(self.rawStream,
+                                                                 'replace')
+
+        self.chunk = u""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+        self.errors = []
+
+        # number of (complete) lines in previous chunks
+        self.prevNumLines = 0
+        # number of columns in the last line of the previous chunk
+        self.prevNumCols = 0
+        
+        #Deal with CR LF and surrogates split over chunk boundaries
+        self._bufferedCharacter = None
+
+    def openStream(self, source):
+        """Produces a file object from source.
+
+        source can be either a file object, local filename or a string.
+
+        """
+        # Already a file object
+        if hasattr(source, 'read'):
+            stream = source
+        else:
+            # Otherwise treat source as a string and convert to a file object
+            if isinstance(source, unicode):
+                source = source.encode('utf-8')
+                self.charEncoding = ("utf-8", "certain")
+            try:
+                from io import BytesIO
+            except:
+                # 2to3 converts this line to: from io import StringIO  
+                from cStringIO import StringIO as BytesIO
+            stream = BytesIO(source)
+
+        if (not(hasattr(stream, "tell") and hasattr(stream, "seek")) or
+            stream is sys.stdin):
+            stream = BufferedStream(stream)
+
+        return stream
+
+    def detectEncoding(self, parseMeta=True, chardet=True):
+        #First look for a BOM
+        #This will also read past the BOM if present
+        encoding = self.detectBOM()
+        confidence = "certain"
+        #If there is no BOM need to look for meta elements with encoding 
+        #information
+        if encoding is None and parseMeta:
+            encoding = self.detectEncodingMeta()
+            confidence = "tentative"
+        #Guess with chardet, if avaliable
+        if encoding is None and chardet:
+            confidence = "tentative"
+            try:
+                from chardet.universaldetector import UniversalDetector
+                buffers = []
+                detector = UniversalDetector()
+                while not detector.done:
+                    buffer = self.rawStream.read(self.numBytesChardet)
+                    if not buffer:
+                        break
+                    buffers.append(buffer)
+                    detector.feed(buffer)
+                detector.close()
+                encoding = detector.result['encoding']
+                self.rawStream.seek(0)
+            except ImportError:
+                pass
+        # If all else fails use the default encoding
+        if encoding is None:
+            confidence="tentative"
+            encoding = self.defaultEncoding
+        
+        #Substitute for equivalent encodings:
+        encodingSub = {"iso-8859-1":"windows-1252"}
+
+        if encoding.lower() in encodingSub:
+            encoding = encodingSub[encoding.lower()]
+
+        return encoding, confidence
+
+    def changeEncoding(self, newEncoding):
+        newEncoding = codecName(newEncoding)
+        if newEncoding in ("utf-16", "utf-16-be", "utf-16-le"):
+            newEncoding = "utf-8"
+        if newEncoding is None:
+            return
+        elif newEncoding == self.charEncoding[0]:
+            self.charEncoding = (self.charEncoding[0], "certain")
+        else:
+            self.rawStream.seek(0)
+            self.reset()
+            self.charEncoding = (newEncoding, "certain")
+            raise ReparseException, "Encoding changed from %s to %s"%(self.charEncoding[0], newEncoding)
+            
+    def detectBOM(self):
+        """Attempts to detect at BOM at the start of the stream. If
+        an encoding can be determined from the BOM return the name of the
+        encoding otherwise return None"""
+        bomDict = {
+            codecs.BOM_UTF8: 'utf-8',
+            codecs.BOM_UTF16_LE: 'utf-16-le', codecs.BOM_UTF16_BE: 'utf-16-be',
+            codecs.BOM_UTF32_LE: 'utf-32-le', codecs.BOM_UTF32_BE: 'utf-32-be'
+        }
+
+        # Go to beginning of file and read in 4 bytes
+        string = self.rawStream.read(4)
+
+        # Try detecting the BOM using bytes from the string
+        encoding = bomDict.get(string[:3])         # UTF-8
+        seek = 3
+        if not encoding:
+            # Need to detect UTF-32 before UTF-16
+            encoding = bomDict.get(string)         # UTF-32
+            seek = 4
+            if not encoding:
+                encoding = bomDict.get(string[:2]) # UTF-16
+                seek = 2
+
+        # Set the read position past the BOM if one was found, otherwise
+        # set it to the start of the stream
+        self.rawStream.seek(encoding and seek or 0)
+
+        return encoding
+
+    def detectEncodingMeta(self):
+        """Report the encoding declared by the meta element
+        """
+        buffer = self.rawStream.read(self.numBytesMeta)
+        parser = EncodingParser(buffer)
+        self.rawStream.seek(0)
+        encoding = parser.getEncoding()
+        
+        if encoding in ("utf-16", "utf-16-be", "utf-16-le"):
+            encoding = "utf-8"
+
+        return encoding
+
+    def _position(self, offset):
+        chunk = self.chunk
+        nLines = chunk.count(u'\n', 0, offset)
+        positionLine = self.prevNumLines + nLines
+        lastLinePos = chunk.rfind(u'\n', 0, offset)
+        if lastLinePos == -1:
+            positionColumn = self.prevNumCols + offset
+        else:
+            positionColumn = offset - (lastLinePos + 1)
+        return (positionLine, positionColumn)
+
+    def position(self):
+        """Returns (line, col) of the current position in the stream."""
+        line, col = self._position(self.chunkOffset)
+        return (line+1, col)
+
+    def char(self):
+        """ Read one character from the stream or queue if available. Return
+            EOF when EOF is reached.
+        """
+        # Read a new chunk from the input stream if necessary
+        if self.chunkOffset >= self.chunkSize:
+            if not self.readChunk():
+                return EOF
+
+        chunkOffset = self.chunkOffset
+        char = self.chunk[chunkOffset]
+        self.chunkOffset = chunkOffset + 1
+
+        return char
+
+    def readChunk(self, chunkSize=None):
+        if chunkSize is None:
+            chunkSize = self._defaultChunkSize
+
+        self.prevNumLines, self.prevNumCols = self._position(self.chunkSize)
+
+        self.chunk = u""
+        self.chunkSize = 0
+        self.chunkOffset = 0
+
+        data = self.dataStream.read(chunkSize)
+        
+        #Deal with CR LF and surrogates broken across chunks
+        if self._bufferedCharacter:
+            data = self._bufferedCharacter + data
+            self._bufferedCharacter = None
+        elif not data:
+            # We have no more data, bye-bye stream
+            return False
+        
+        if len(data) > 1:
+            lastv = ord(data[-1])
+            if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF:
+                self._bufferedCharacter = data[-1]
+                data = data[:-1]
+        
+        self.reportCharacterErrors(data)
+        
+        # Replace invalid characters
+        # Note U+0000 is dealt with in the tokenizer
+        data = self.replaceCharactersRegexp.sub(u"\ufffd", data)
+                    
+        data = data.replace(u"\r\n", u"\n")
+        data = data.replace(u"\r", u"\n")
+
+        self.chunk = data
+        self.chunkSize = len(data)
+
+        return True
+
+    def characterErrorsUCS4(self, data):
+        for i in xrange(len(invalid_unicode_re.findall(data))):
+            self.errors.append("invalid-codepoint")
+
+    def characterErrorsUCS2(self, data):
+        #Someone picked the wrong compile option
+        #You lose
+        skip = False
+        import sys
+        for match in invalid_unicode_re.finditer(data):
+            if skip:
+                continue
+            codepoint = ord(match.group())
+            pos = match.start()
+            #Pretty sure there should be endianness issues here
+            if utils.isSurrogatePair(data[pos:pos+2]):
+                #We have a surrogate pair!
+                char_val = utils.surrogatePairToCodepoint(data[pos:pos+2])
+                if char_val in non_bmp_invalid_codepoints:
+                    self.errors.append("invalid-codepoint")
+                skip = True
+            elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and
+                  pos == len(data) - 1):
+                self.errors.append("invalid-codepoint")
+            else:
+                skip = False
+                self.errors.append("invalid-codepoint")
+
+    def charsUntil(self, characters, opposite = False):
+        """ Returns a string of characters from the stream up to but not
+        including any character in 'characters' or EOF. 'characters' must be
+        a container that supports the 'in' method and iteration over its
+        characters.
+        """
+
+        # Use a cache of regexps to find the required characters
+        try:
+            chars = charsUntilRegEx[(characters, opposite)]
+        except KeyError:
+            if __debug__:
+                for c in characters: 
+                    assert(ord(c) < 128)
+            regex = u"".join([u"\\x%02x" % ord(c) for c in characters])
+            if not opposite:
+                regex = u"^%s" % regex
+            chars = charsUntilRegEx[(characters, opposite)] = re.compile(u"[%s]+" % regex)
+
+        rv = []
+
+        while True:
+            # Find the longest matching prefix
+            m = chars.match(self.chunk, self.chunkOffset)
+            if m is None:
+                # If nothing matched, and it wasn't because we ran out of chunk,
+                # then stop
+                if self.chunkOffset != self.chunkSize:
+                    break
+            else:
+                end = m.end()
+                # If not the whole chunk matched, return everything
+                # up to the part that didn't match
+                if end != self.chunkSize:
+                    rv.append(self.chunk[self.chunkOffset:end])
+                    self.chunkOffset = end
+                    break
+            # If the whole remainder of the chunk matched,
+            # use it all and read the next chunk
+            rv.append(self.chunk[self.chunkOffset:])
+            if not self.readChunk():
+                # Reached EOF
+                break
+
+        r = u"".join(rv)
+        return r
+
+    def unget(self, char):
+        # Only one character is allowed to be ungotten at once - it must
+        # be consumed again before any further call to unget
+        if char is not None:
+            if self.chunkOffset == 0:
+                # unget is called quite rarely, so it's a good idea to do
+                # more work here if it saves a bit of work in the frequently
+                # called char and charsUntil.
+                # So, just prepend the ungotten character onto the current
+                # chunk:
+                self.chunk = char + self.chunk
+                self.chunkSize += 1
+            else:
+                self.chunkOffset -= 1
+                assert self.chunk[self.chunkOffset] == char
+
+class EncodingBytes(str):
+    """String-like object with an associated position and various extra methods
+    If the position is ever greater than the string length then an exception is
+    raised"""
+    def __new__(self, value):
+        return str.__new__(self, value.lower())
+
+    def __init__(self, value):
+        self._position=-1
+    
+    def __iter__(self):
+        return self
+    
+    def next(self):
+        p = self._position = self._position + 1
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        return self[p]
+
+    def previous(self):
+        p = self._position
+        if p >= len(self):
+            raise StopIteration
+        elif p < 0:
+            raise TypeError
+        self._position = p = p - 1
+        return self[p]
+    
+    def setPosition(self, position):
+        if self._position >= len(self):
+            raise StopIteration
+        self._position = position
+    
+    def getPosition(self):
+        if self._position >= len(self):
+            raise StopIteration
+        if self._position >= 0:
+            return self._position
+        else:
+            return None
+    
+    position = property(getPosition, setPosition)
+
+    def getCurrentByte(self):
+        return self[self.position]
+    
+    currentByte = property(getCurrentByte)
+
+    def skip(self, chars=spaceCharactersBytes):
+        """Skip past a list of characters"""
+        p = self.position               # use property for the error-checking
+        while p < len(self):
+            c = self[p]
+            if c not in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def skipUntil(self, chars):
+        p = self.position
+        while p < len(self):
+            c = self[p]
+            if c in chars:
+                self._position = p
+                return c
+            p += 1
+        self._position = p
+        return None
+
+    def matchBytes(self, bytes):
+        """Look for a sequence of bytes at the start of a string. If the bytes 
+        are found return True and advance the position to the byte after the 
+        match. Otherwise return False and leave the position alone"""
+        p = self.position
+        data = self[p:p+len(bytes)]
+        rv = data.startswith(bytes)
+        if rv:
+            self.position += len(bytes)
+        return rv
+    
+    def jumpTo(self, bytes):
+        """Look for the next sequence of bytes matching a given sequence. If
+        a match is found advance the position to the last byte of the match"""
+        newPosition = self[self.position:].find(bytes)
+        if newPosition > -1:
+            # XXX: This is ugly, but I can't see a nicer way to fix this.
+            if self._position == -1:
+                self._position = 0
+            self._position += (newPosition + len(bytes)-1)
+            return True
+        else:
+            raise StopIteration
+
+class EncodingParser(object):
+    """Mini parser for detecting character encoding from meta elements"""
+
+    def __init__(self, data):
+        """string - the data to work on for encoding detection"""
+        self.data = EncodingBytes(data)
+        self.encoding = None
+
+    def getEncoding(self):
+        methodDispatch = (
+            ("<!--",self.handleComment),
+            ("<meta",self.handleMeta),
+            ("</",self.handlePossibleEndTag),
+            ("<!",self.handleOther),
+            ("<?",self.handleOther),
+            ("<",self.handlePossibleStartTag))
+        for byte in self.data:
+            keepParsing = True
+            for key, method in methodDispatch:
+                if self.data.matchBytes(key):
+                    try:
+                        keepParsing = method()    
+                        break
+                    except StopIteration:
+                        keepParsing=False
+                        break
+            if not keepParsing:
+                break
+        
+        return self.encoding
+
+    def handleComment(self):
+        """Skip over comments"""
+        return self.data.jumpTo("-->")
+
+    def handleMeta(self):
+        if self.data.currentByte not in spaceCharactersBytes:
+            #if we have <meta not followed by a space so just keep going
+            return True
+        #We have a valid meta element we want to search for attributes
+        while True:
+            #Try to find the next attribute after the current position
+            attr = self.getAttribute()
+            if attr is None:
+                return True
+            else:
+                if attr[0] == "charset":
+                    tentativeEncoding = attr[1]
+                    codec = codecName(tentativeEncoding)
+                    if codec is not None:
+                        self.encoding = codec
+                        return False
+                elif attr[0] == "content":
+                    contentParser = ContentAttrParser(EncodingBytes(attr[1]))
+                    tentativeEncoding = contentParser.parse()
+                    codec = codecName(tentativeEncoding)
+                    if codec is not None:
+                        self.encoding = codec
+                        return False
+
+    def handlePossibleStartTag(self):
+        return self.handlePossibleTag(False)
+
+    def handlePossibleEndTag(self):
+        self.data.next()
+        return self.handlePossibleTag(True)
+
+    def handlePossibleTag(self, endTag):
+        data = self.data
+        if data.currentByte not in asciiLettersBytes:
+            #If the next byte is not an ascii letter either ignore this
+            #fragment (possible start tag case) or treat it according to 
+            #handleOther
+            if endTag:
+                data.previous()
+                self.handleOther()
+            return True
+        
+        c = data.skipUntil(spacesAngleBrackets)
+        if c == "<":
+            #return to the first step in the overall "two step" algorithm
+            #reprocessing the < byte
+            data.previous()
+        else:
+            #Read all attributes
+            attr = self.getAttribute()
+            while attr is not None:
+                attr = self.getAttribute()
+        return True
+
+    def handleOther(self):
+        return self.data.jumpTo(">")
+
+    def getAttribute(self):
+        """Return a name,value pair for the next attribute in the stream, 
+        if one is found, or None"""
+        data = self.data
+        # Step 1 (skip chars)
+        c = data.skip(spaceCharactersBytes | frozenset("/"))
+        # Step 2
+        if c in (">", None):
+            return None
+        # Step 3
+        attrName = []
+        attrValue = []
+        #Step 4 attribute name
+        while True:
+            if c == "=" and attrName:   
+                break
+            elif c in spaceCharactersBytes:
+                #Step 6!
+                c = data.skip()
+                c = data.next()
+                break
+            elif c in ("/", ">"):
+                return "".join(attrName), ""
+            elif c in asciiUppercaseBytes:
+                attrName.append(c.lower())
+            elif c == None:
+                return None
+            else:
+                attrName.append(c)
+            #Step 5
+            c = data.next()
+        #Step 7
+        if c != "=":
+            data.previous()
+            return "".join(attrName), ""
+        #Step 8
+        data.next()
+        #Step 9
+        c = data.skip()
+        #Step 10
+        if c in ("'", '"'):
+            #10.1
+            quoteChar = c
+            while True:
+                #10.2
+                c = data.next()
+                #10.3
+                if c == quoteChar:
+                    data.next()
+                    return "".join(attrName), "".join(attrValue)
+                #10.4
+                elif c in asciiUppercaseBytes:
+                    attrValue.append(c.lower())
+                #10.5
+                else:
+                    attrValue.append(c)
+        elif c == ">":
+            return "".join(attrName), ""
+        elif c in asciiUppercaseBytes:
+            attrValue.append(c.lower())
+        elif c is None:
+            return None
+        else:
+            attrValue.append(c)
+        # Step 11
+        while True:
+            c = data.next()
+            if c in spacesAngleBrackets:
+                return "".join(attrName), "".join(attrValue)
+            elif c in asciiUppercaseBytes:
+                attrValue.append(c.lower())
+            elif c is None:
+                return None
+            else:
+                attrValue.append(c)
+
+
+class ContentAttrParser(object):
+    def __init__(self, data):
+        self.data = data
+    def parse(self):
+        try:
+            #Check if the attr name is charset 
+            #otherwise return
+            self.data.jumpTo("charset")
+            self.data.position += 1
+            self.data.skip()
+            if not self.data.currentByte == "=":
+                #If there is no = sign keep looking for attrs
+                return None
+            self.data.position += 1
+            self.data.skip()
+            #Look for an encoding between matching quote marks
+            if self.data.currentByte in ('"', "'"):
+                quoteMark = self.data.currentByte
+                self.data.position += 1
+                oldPosition = self.data.position
+                if self.data.jumpTo(quoteMark):
+                    return self.data[oldPosition:self.data.position]
+                else:
+                    return None
+            else:
+                #Unquoted value
+                oldPosition = self.data.position
+                try:
+                    self.data.skipUntil(spaceCharactersBytes)
+                    return self.data[oldPosition:self.data.position]
+                except StopIteration:
+                    #Return the whole remaining value
+                    return self.data[oldPosition:]
+        except StopIteration:
+            return None
+
+
+def codecName(encoding):
+    """Return the python codec name corresponding to an encoding or None if the
+    string doesn't correspond to a valid encoding."""
+    if (encoding is not None and type(encoding) in types.StringTypes):
+        canonicalName = ascii_punctuation_re.sub("", encoding).lower()
+        return encodings.get(canonicalName, None)
+    else:
+        return None
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
new file mode 100644
index 00000000..ae4c7d83
--- /dev/null
+++ b/html5lib/sanitizer.py
@@ -0,0 +1,258 @@
+import re
+from xml.sax.saxutils import escape, unescape
+
+from tokenizer import HTMLTokenizer
+from constants import tokenTypes
+
+class HTMLSanitizerMixin(object):
+    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+
+    acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
+        'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
+        'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
+        'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
+        'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
+        'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
+        'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
+        'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
+        'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
+        'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
+        'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
+        'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
+        'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
+      
+    mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
+        'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
+        'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
+        'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
+        'munderover', 'none']
+      
+    svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
+        'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
+        'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
+        'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
+        'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
+        'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
+        
+    acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
+        'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
+        'background', 'balance', 'bgcolor', 'bgproperties', 'border',
+        'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
+        'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
+        'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
+        'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
+        'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
+        'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
+        'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
+        'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
+        'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
+        'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
+        'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
+        'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
+        'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
+        'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
+        'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
+        'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
+        'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
+        'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
+        'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
+        'width', 'wrap', 'xml:lang']
+
+    mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
+        'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
+        'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
+        'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
+        'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
+        'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
+        'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
+        'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
+        'xlink:type', 'xmlns', 'xmlns:xlink']
+  
+    svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
+        'arabic-form', 'ascent', 'attributeName', 'attributeType',
+        'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
+        'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
+        'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
+        'fill-opacity', 'fill-rule', 'font-family', 'font-size',
+        'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
+        'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
+        'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
+        'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
+        'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
+        'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
+        'opacity', 'orient', 'origin', 'overline-position',
+        'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
+        'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
+        'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
+        'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
+        'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
+        'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
+        'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
+        'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
+        'transform', 'type', 'u1', 'u2', 'underline-position',
+        'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
+        'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
+        'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
+        'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
+        'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
+        'y1', 'y2', 'zoomAndPan']
+
+    attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
+        'xlink:href', 'xml:base']
+
+    svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
+        'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
+        'mask', 'stroke']
+
+    svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
+        'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
+        'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
+        'set', 'use']
+  
+    acceptable_css_properties = ['azimuth', 'background-color',
+        'border-bottom-color', 'border-collapse', 'border-color',
+        'border-left-color', 'border-right-color', 'border-top-color', 'clear',
+        'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
+        'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
+        'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
+        'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
+        'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
+        'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
+        'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
+        'white-space', 'width']
+  
+    acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
+        'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
+        'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
+        'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
+        'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
+        'transparent', 'underline', 'white', 'yellow']
+  
+    acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
+        'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
+        'stroke-opacity']
+  
+    acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
+        'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
+        'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
+        'ssh', 'sftp', 'rtsp', 'afs' ]
+  
+    # subclasses may define their own versions of these constants
+    allowed_elements = acceptable_elements + mathml_elements + svg_elements
+    allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
+    allowed_css_properties = acceptable_css_properties
+    allowed_css_keywords = acceptable_css_keywords
+    allowed_svg_properties = acceptable_svg_properties
+    allowed_protocols = acceptable_protocols
+
+    # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
+    # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
+    # attributes are parsed, and a restricted set, # specified by
+    # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
+    # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
+    # in ALLOWED_PROTOCOLS are allowed.
+    #
+    #   sanitize_html('<script> do_nasty_stuff() </script>')
+    #    => &lt;script> do_nasty_stuff() &lt;/script>
+    #   sanitize_html('<a href="javascript: sucker();">Click here for $100</a>')
+    #    => <a>Click here for $100</a>
+    def sanitize_token(self, token):
+
+        # accommodate filters which use token_type differently
+        token_type = token["type"]
+        if token_type in tokenTypes.keys():
+          token_type = tokenTypes[token_type]
+
+        if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"], 
+                             tokenTypes["EmptyTag"]):
+            if token["name"] in self.allowed_elements:
+                if token.has_key("data"):
+                    attrs = dict([(name,val) for name,val in
+                                  token["data"][::-1] 
+                                  if name in self.allowed_attributes])
+                    for attr in self.attr_val_is_uri:
+                        if not attrs.has_key(attr):
+                            continue
+                        val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
+                                               unescape(attrs[attr])).lower()
+                        #remove replacement characters from unescaped characters
+                        val_unescaped = val_unescaped.replace(u"\ufffd", "")
+                        if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
+                            (val_unescaped.split(':')[0] not in 
+                             self.allowed_protocols)):
+                            del attrs[attr]
+                    for attr in self.svg_attr_val_allows_ref:
+                        if attr in attrs:
+                            attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
+                                                 ' ',
+                                                 unescape(attrs[attr]))
+                    if (token["name"] in self.svg_allow_local_href and
+                        'xlink:href' in attrs and re.search('^\s*[^#\s].*',
+                                                            attrs['xlink:href'])):
+                        del attrs['xlink:href']
+                    if attrs.has_key('style'):
+                        attrs['style'] = self.sanitize_css(attrs['style'])
+                    token["data"] = [[name,val] for name,val in attrs.items()]
+                return token
+            else:
+                if token_type == tokenTypes["EndTag"]:
+                    token["data"] = "</%s>" % token["name"]
+                elif token["data"]:
+                    attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
+                    token["data"] = "<%s%s>" % (token["name"],attrs)
+                else:
+                    token["data"] = "<%s>" % token["name"]
+                if token.get("selfClosing"):
+                    token["data"]=token["data"][:-1] + "/>"
+
+                if token["type"] in tokenTypes.keys():
+                    token["type"] = "Characters"
+                else:
+                    token["type"] = tokenTypes["Characters"]
+
+                del token["name"]
+                return token
+        elif token_type == tokenTypes["Comment"]:
+            pass
+        else:
+            return token
+
+    def sanitize_css(self, style):
+        # disallow urls
+        style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
+
+        # gauntlet
+        if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
+        if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
+
+        clean = []
+        for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
+          if not value: continue
+          if prop.lower() in self.allowed_css_properties:
+              clean.append(prop + ': ' + value + ';')
+          elif prop.split('-')[0].lower() in ['background','border','margin',
+                                              'padding']:
+              for keyword in value.split():
+                  if not keyword in self.acceptable_css_keywords and \
+                      not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
+                      break
+              else:
+                  clean.append(prop + ': ' + value + ';')
+          elif prop.lower() in self.allowed_svg_properties:
+              clean.append(prop + ': ' + value + ';')
+
+        return ' '.join(clean)
+
+class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
+    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
+                 lowercaseElementName=False, lowercaseAttrName=False, parser=None):
+        #Change case matching defaults as we only output lowercase html anyway
+        #This solution doesn't seem ideal...
+        HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
+                               lowercaseElementName, lowercaseAttrName, parser=parser)
+
+    def __iter__(self):
+        for token in HTMLTokenizer.__iter__(self):
+            token = self.sanitize_token(token)
+            if token:
+                yield token
diff --git a/html5lib/serializer/__init__.py b/html5lib/serializer/__init__.py
new file mode 100644
index 00000000..1b746655
--- /dev/null
+++ b/html5lib/serializer/__init__.py
@@ -0,0 +1,17 @@
+
+from html5lib import treewalkers
+
+from htmlserializer import HTMLSerializer
+from xhtmlserializer import XHTMLSerializer
+
+def serialize(input, tree="simpletree", format="html", encoding=None,
+              **serializer_opts):
+    # XXX: Should we cache this?
+    walker = treewalkers.getTreeWalker(tree) 
+    if format == "html":
+        s = HTMLSerializer(**serializer_opts)
+    elif format == "xhtml":
+        s = XHTMLSerializer(**serializer_opts)
+    else:
+        raise ValueError, "type must be either html or xhtml"
+    return s.render(walker(input), encoding)
diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py
new file mode 100644
index 00000000..8dd0a815
--- /dev/null
+++ b/html5lib/serializer/htmlserializer.py
@@ -0,0 +1,312 @@
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import ImmutableSet as frozenset
+
+import gettext
+_ = gettext.gettext
+
+from html5lib.constants import voidElements, booleanAttributes, spaceCharacters
+from html5lib.constants import rcdataElements, entities, xmlEntities
+from html5lib import utils
+from xml.sax.saxutils import escape
+
+spaceCharacters = u"".join(spaceCharacters)
+
+try:
+    from codecs import register_error, xmlcharrefreplace_errors
+except ImportError:
+    unicode_encode_errors = "strict"
+else:
+    unicode_encode_errors = "htmlentityreplace"
+
+    from html5lib.constants import entities
+
+    encode_entity_map = {}
+    is_ucs4 = len(u"\U0010FFFF") == 1
+    for k, v in entities.items():
+        #skip multi-character entities
+        if ((is_ucs4 and len(v) > 1) or
+            (not is_ucs4 and len(v) > 2)):
+            continue
+        if v != "&":
+            if len(v) == 2:
+                v = utils.surrogatePairToCodepoint(v)
+            else:
+                try:
+                    v = ord(v)
+                except:
+                    print v
+                    raise
+            if not v in encode_entity_map or k.islower():
+                # prefer &lt; over &LT; and similarly for &amp;, &gt;, etc.
+                encode_entity_map[v] = k
+
+    def htmlentityreplace_errors(exc):
+        if isinstance(exc, (UnicodeEncodeError, UnicodeTranslateError)):
+            res = []
+            codepoints = []
+            skip = False
+            for i, c in enumerate(exc.object[exc.start:exc.end]):
+                if skip:
+                    skip = False
+                    continue
+                index = i + exc.start
+                if utils.isSurrogatePair(exc.object[index:min([exc.end, index+2])]):
+                    codepoint = utils.surrogatePairToCodepoint(exc.object[index:index+2])
+                    skip = True
+                else:
+                    codepoint = ord(c)
+                codepoints.append(codepoint)
+            for cp in codepoints:
+                e = encode_entity_map.get(cp)
+                if e:
+                    res.append("&")
+                    res.append(e)
+                    if not e.endswith(";"):
+                        res.append(";")
+                else:
+                    res.append("&#x%s;"%(hex(cp)[2:]))
+            return (u"".join(res), exc.end)
+        else:
+            return xmlcharrefreplace_errors(exc)
+
+    register_error(unicode_encode_errors, htmlentityreplace_errors)
+
+    del register_error
+
+
+class HTMLSerializer(object):
+
+    # attribute quoting options
+    quote_attr_values = False
+    quote_char = u'"'
+    use_best_quote_char = True
+
+    # tag syntax options
+    omit_optional_tags = True
+    minimize_boolean_attributes = True
+    use_trailing_solidus = False
+    space_before_trailing_solidus = True
+
+    # escaping options
+    escape_lt_in_attrs = False
+    escape_rcdata = False
+    resolve_entities = True
+
+    # miscellaneous options
+    inject_meta_charset = True
+    strip_whitespace = False
+    sanitize = False
+
+    options = ("quote_attr_values", "quote_char", "use_best_quote_char",
+          "minimize_boolean_attributes", "use_trailing_solidus",
+          "space_before_trailing_solidus", "omit_optional_tags",
+          "strip_whitespace", "inject_meta_charset", "escape_lt_in_attrs",
+          "escape_rcdata", "resolve_entities", "sanitize")
+
+    def __init__(self, **kwargs):
+        """Initialize HTMLSerializer.
+
+        Keyword options (default given first unless specified) include:
+
+        inject_meta_charset=True|False
+          Whether it insert a meta element to define the character set of the
+          document.
+        quote_attr_values=True|False
+          Whether to quote attribute values that don't require quoting
+          per HTML5 parsing rules.
+        quote_char=u'"'|u"'"
+          Use given quote character for attribute quoting. Default is to
+          use double quote unless attribute value contains a double quote,
+          in which case single quotes are used instead.
+        escape_lt_in_attrs=False|True
+          Whether to escape < in attribute values.
+        escape_rcdata=False|True
+          Whether to escape characters that need to be escaped within normal
+          elements within rcdata elements such as style.
+        resolve_entities=True|False
+          Whether to resolve named character entities that appear in the
+          source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
+          are unaffected by this setting.
+        strip_whitespace=False|True
+          Whether to remove semantically meaningless whitespace. (This
+          compresses all whitespace to a single space except within pre.)
+        minimize_boolean_attributes=True|False
+          Shortens boolean attributes to give just the attribute value,
+          for example <input disabled="disabled"> becomes <input disabled>.
+        use_trailing_solidus=False|True
+          Includes a close-tag slash at the end of the start tag of void
+          elements (empty elements whose end tag is forbidden). E.g. <hr/>.
+        space_before_trailing_solidus=True|False
+          Places a space immediately before the closing slash in a tag
+          using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
+        sanitize=False|True
+          Strip all unsafe or unknown constructs from output.
+          See `html5lib user documentation`_
+        omit_optional_tags=True|False
+          Omit start/end tags that are optional.
+
+        .. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
+        """
+        if kwargs.has_key('quote_char'):
+            self.use_best_quote_char = False
+        for attr in self.options:
+            setattr(self, attr, kwargs.get(attr, getattr(self, attr)))
+        self.errors = []
+        self.strict = False
+
+    def encode(self, string):
+        assert(isinstance(string, unicode))
+        if self.encoding:
+            return string.encode(self.encoding, unicode_encode_errors)
+        else:
+            return string
+
+    def encodeStrict(self, string):
+        assert(isinstance(string, unicode))
+        if self.encoding:
+            return string.encode(self.encoding, "strict")
+        else:
+            return string
+
+    def serialize(self, treewalker, encoding=None):
+        self.encoding = encoding
+        in_cdata = False
+        self.errors = []
+        if encoding and self.inject_meta_charset:
+            from html5lib.filters.inject_meta_charset import Filter
+            treewalker = Filter(treewalker, encoding)
+        # XXX: WhitespaceFilter should be used before OptionalTagFilter
+        # for maximum efficiently of this latter filter
+        if self.strip_whitespace:
+            from html5lib.filters.whitespace import Filter
+            treewalker = Filter(treewalker)
+        if self.sanitize:
+            from html5lib.filters.sanitizer import Filter
+            treewalker = Filter(treewalker)
+        if self.omit_optional_tags:
+            from html5lib.filters.optionaltags import Filter
+            treewalker = Filter(treewalker)
+        for token in treewalker:
+            type = token["type"]
+            if type == "Doctype":
+                doctype = u"<!DOCTYPE %s" % token["name"]
+                
+                if token["publicId"]:
+                    doctype += u' PUBLIC "%s"' % token["publicId"]
+                elif token["systemId"]:
+                    doctype += u" SYSTEM"
+                if token["systemId"]:                
+                    if token["systemId"].find(u'"') >= 0:
+                        if token["systemId"].find(u"'") >= 0:
+                            self.serializeError(_("System identifer contains both single and double quote characters"))
+                        quote_char = u"'"
+                    else:
+                        quote_char = u'"'
+                    doctype += u" %s%s%s" % (quote_char, token["systemId"], quote_char)
+                
+                doctype += u">"
+                yield self.encodeStrict(doctype)
+
+            elif type in ("Characters", "SpaceCharacters"):
+                if type == "SpaceCharacters" or in_cdata:
+                    if in_cdata and token["data"].find("</") >= 0:
+                        self.serializeError(_("Unexpected </ in CDATA"))
+                    yield self.encode(token["data"])
+                else:
+                    yield self.encode(escape(token["data"]))
+
+            elif type in ("StartTag", "EmptyTag"):
+                name = token["name"]
+                yield self.encodeStrict(u"<%s" % name)
+                if name in rcdataElements and not self.escape_rcdata:
+                    in_cdata = True
+                elif in_cdata:
+                    self.serializeError(_("Unexpected child element of a CDATA element"))
+                attributes = []
+                for (attr_namespace,attr_name),attr_value in sorted(token["data"].items()):
+                    #TODO: Add namespace support here
+                    k = attr_name
+                    v = attr_value
+                    yield self.encodeStrict(u' ')
+
+                    yield self.encodeStrict(k)
+                    if not self.minimize_boolean_attributes or \
+                      (k not in booleanAttributes.get(name, tuple()) \
+                      and k not in booleanAttributes.get("", tuple())):
+                        yield self.encodeStrict(u"=")
+                        if self.quote_attr_values or not v:
+                            quote_attr = True
+                        else:
+                            quote_attr = reduce(lambda x,y: x or (y in v),
+                                spaceCharacters + u">\"'=", False)
+                        v = v.replace(u"&", u"&amp;")
+                        if self.escape_lt_in_attrs: v = v.replace(u"<", u"&lt;")
+                        if quote_attr:
+                            quote_char = self.quote_char
+                            if self.use_best_quote_char:
+                                if u"'" in v and u'"' not in v:
+                                    quote_char = u'"'
+                                elif u'"' in v and u"'" not in v:
+                                    quote_char = u"'"
+                            if quote_char == u"'":
+                                v = v.replace(u"'", u"&#39;")
+                            else:
+                                v = v.replace(u'"', u"&quot;")
+                            yield self.encodeStrict(quote_char)
+                            yield self.encode(v)
+                            yield self.encodeStrict(quote_char)
+                        else:
+                            yield self.encode(v)
+                if name in voidElements and self.use_trailing_solidus:
+                    if self.space_before_trailing_solidus:
+                        yield self.encodeStrict(u" /")
+                    else:
+                        yield self.encodeStrict(u"/")
+                yield self.encode(u">")
+
+            elif type == "EndTag":
+                name = token["name"]
+                if name in rcdataElements:
+                    in_cdata = False
+                elif in_cdata:
+                    self.serializeError(_("Unexpected child element of a CDATA element"))
+                yield self.encodeStrict(u"</%s>" % name)
+
+            elif type == "Comment":
+                data = token["data"]
+                if data.find("--") >= 0:
+                    self.serializeError(_("Comment contains --"))
+                yield self.encodeStrict(u"<!--%s-->" % token["data"])
+
+            elif type == "Entity":
+                name = token["name"]
+                key = name + ";"
+                if not key in entities:
+                    self.serializeError(_("Entity %s not recognized" % name))
+                if self.resolve_entities and key not in xmlEntities:
+                    data = entities[key]
+                else:
+                    data = u"&%s;" % name
+                yield self.encodeStrict(data)
+
+            else:
+                self.serializeError(token["data"])
+
+    def render(self, treewalker, encoding=None):
+        if encoding:
+            return "".join(list(self.serialize(treewalker, encoding)))
+        else:
+            return u"".join(list(self.serialize(treewalker)))
+
+    def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
+        # XXX The idea is to make data mandatory.
+        self.errors.append(data)
+        if self.strict:
+            raise SerializeError
+
+def SerializeError(Exception):
+    """Error in serialized tree"""
+    pass
diff --git a/html5lib/serializer/xhtmlserializer.py b/html5lib/serializer/xhtmlserializer.py
new file mode 100644
index 00000000..7fdce47b
--- /dev/null
+++ b/html5lib/serializer/xhtmlserializer.py
@@ -0,0 +1,9 @@
+from htmlserializer import HTMLSerializer
+
+class XHTMLSerializer(HTMLSerializer):
+    quote_attr_values = True
+    minimize_boolean_attributes = False
+    use_trailing_solidus = True
+    escape_lt_in_attrs = True
+    omit_optional_tags = False
+    escape_rcdata = True
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
new file mode 100644
index 00000000..a7e9c4ea
--- /dev/null
+++ b/html5lib/tests/__init__.py
@@ -0,0 +1,12 @@
+import sys
+import os
+
+parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
+
+if not parent_path in sys.path:
+    sys.path.insert(0, parent_path)
+del parent_path
+
+from runtests import buildTestSuite
+
+import support
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
new file mode 100644
index 00000000..5f9092b2
--- /dev/null
+++ b/html5lib/tests/mockParser.py
@@ -0,0 +1,37 @@
+import sys
+import os
+
+if __name__ == '__main__':
+    #Allow us to import from the src directory
+    os.chdir(os.path.split(os.path.abspath(__file__))[0])
+    sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
+
+from tokenizer import HTMLTokenizer
+
+class HTMLParser(object):
+    """ Fake parser to test tokenizer output """
+    def parse(self, stream, output=True):
+        tokenizer = HTMLTokenizer(stream)
+        for token in tokenizer:
+            if output:
+                print token
+
+if __name__ == "__main__":
+    x = HTMLParser()
+    if len(sys.argv) > 1:
+        if len(sys.argv) > 2:
+            import hotshot, hotshot.stats
+            prof = hotshot.Profile('stats.prof')
+            prof.runcall(x.parse, sys.argv[1], False)
+            prof.close()
+            stats = hotshot.stats.load('stats.prof')
+            stats.strip_dirs()
+            stats.sort_stats('time')
+            stats.print_stats()
+        else:
+            x.parse(sys.argv[1])
+    else:
+        print """Usage: python mockParser.py filename [stats]
+        If stats is specified the hotshots profiler will run and output the
+        stats instead.
+        """
diff --git a/html5lib/tests/runparsertests.py b/html5lib/tests/runparsertests.py
new file mode 100644
index 00000000..e671f8d7
--- /dev/null
+++ b/html5lib/tests/runparsertests.py
@@ -0,0 +1,27 @@
+import sys
+import os
+import glob
+import unittest
+
+#Allow us to import the parent module
+os.chdir(os.path.split(os.path.abspath(__file__))[0])
+sys.path.insert(0, os.path.abspath(os.curdir))
+sys.path.insert(0, os.path.abspath(os.pardir))
+sys.path.insert(0, os.path.join(os.path.abspath(os.pardir), "src"))
+
+def buildTestSuite():
+    suite = unittest.TestSuite()
+    for testcase in glob.glob('test_*.py'):
+        if testcase in ("test_tokenizer.py", "test_parser.py", "test_parser2.py"):
+            module = os.path.splitext(testcase)[0]
+            suite.addTest(__import__(module).buildTestSuite())
+    return suite
+
+def main():
+    results = unittest.TextTestRunner().run(buildTestSuite())
+    return results
+
+if __name__ == "__main__":
+    results = main()
+    if not results.wasSuccessful():
+        sys.exit(1)
diff --git a/html5lib/tests/runtests.py b/html5lib/tests/runtests.py
new file mode 100644
index 00000000..b8e35722
--- /dev/null
+++ b/html5lib/tests/runtests.py
@@ -0,0 +1,20 @@
+import sys
+import os
+import glob
+import unittest
+
+def buildTestSuite():
+    suite = unittest.TestSuite()
+    for testcase in glob.glob('test_*.py'):
+        module = os.path.splitext(testcase)[0]
+        suite.addTest(__import__(module).buildTestSuite())
+    return suite
+
+def main():
+    results = unittest.TextTestRunner().run(buildTestSuite())
+    return results
+
+if __name__ == "__main__":
+    results = main()
+    if not results.wasSuccessful():
+        sys.exit(1)
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
new file mode 100644
index 00000000..deaf2e25
--- /dev/null
+++ b/html5lib/tests/support.py
@@ -0,0 +1,127 @@
+import os
+import sys
+import codecs
+import glob
+
+base_path = os.path.split(__file__)[0]
+
+if os.path.exists(os.path.join(base_path, 'testdata')):
+    #release
+    test_dir = os.path.join(base_path, 'testdata')
+else:
+    #development
+    test_dir = os.path.abspath(
+        os.path.join(base_path,
+                     os.path.pardir, os.path.pardir,
+                     os.path.pardir, 'testdata'))
+    assert os.path.exists(test_dir), "Test data not found"
+    #import the development html5lib
+    sys.path.insert(0, os.path.abspath(os.path.join(base_path, 
+                                                    os.path.pardir,
+                                                    os.path.pardir)))
+
+import html5lib
+from html5lib import html5parser, treebuilders
+del base_path
+
+#Build a dict of avaliable trees
+treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
+             "DOM":treebuilders.getTreeBuilder("dom")}
+
+#Try whatever etree implementations are avaliable from a list that are
+#"supposed" to work
+try:
+    import xml.etree.ElementTree as ElementTree
+    treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
+except ImportError:
+    try:
+        import elementtree.ElementTree as ElementTree
+        treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
+    except ImportError:
+        pass
+
+try:
+    import xml.etree.cElementTree as cElementTree
+    treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
+except ImportError:
+    try:
+        import cElementTree
+        treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
+    except ImportError:
+        pass
+    
+try:
+    import lxml.etree as lxml
+    treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
+except ImportError:
+    pass
+
+try:
+    import BeautifulSoup
+    treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
+except ImportError:
+    pass
+
+def html5lib_test_files(subdirectory, files='*.dat'):
+    return glob.glob(os.path.join(test_dir,subdirectory,files))
+
+class DefaultDict(dict):
+    def __init__(self, default, *args, **kwargs):
+        self.default = default
+        dict.__init__(self, *args, **kwargs)
+    
+    def __getitem__(self, key):
+        return dict.get(self, key, self.default)
+
+class TestData(object):
+    def __init__(self, filename, newTestHeading="data"):
+        self.f = codecs.open(filename, encoding="utf8")
+        self.newTestHeading = newTestHeading
+    
+    def __iter__(self):
+        data = DefaultDict(None)
+        key=None
+        for line in self.f:
+            heading = self.isSectionHeading(line)
+            if heading:
+                if data and heading == self.newTestHeading:
+                    #Remove trailing newline
+                    data[key] = data[key][:-1]
+                    yield self.normaliseOutput(data)
+                    data = DefaultDict(None)
+                key = heading
+                data[key]=""
+            elif key is not None:
+                data[key] += line
+        if data:
+            yield self.normaliseOutput(data)
+        
+    def isSectionHeading(self, line):
+        """If the current heading is a test section heading return the heading,
+        otherwise return False"""
+        if line.startswith("#"):
+            return line[1:].strip()
+        else:
+            return False
+    
+    def normaliseOutput(self, data):
+        #Remove trailing newlines
+        for key,value in data.iteritems():
+            if value.endswith("\n"):
+                data[key] = value[:-1]
+        return data
+
+def convert(stripChars):
+    def convertData(data):
+        """convert the output of str(document) to the format used in the testcases"""
+        data = data.split("\n")
+        rv = []
+        for line in data:
+            if line.startswith("|"):
+                rv.append(line[stripChars:])
+            else:
+                rv.append(line)
+        return "\n".join(rv)
+    return convertData
+
+convertExpected = convert(2)
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
new file mode 100644
index 00000000..c8c63e84
--- /dev/null
+++ b/html5lib/tests/test_encoding.py
@@ -0,0 +1,54 @@
+import os
+import unittest
+from support import html5lib_test_files, TestData, test_dir
+
+from html5lib import HTMLParser, inputstream
+
+import re, unittest
+
+class Html5EncodingTestCase(unittest.TestCase):
+    def test_codec_name(self):
+        self.assertEquals(inputstream.codecName("utf-8"), "utf-8")
+        self.assertEquals(inputstream.codecName("utf8"), "utf-8")
+        self.assertEquals(inputstream.codecName("  utf8  "), "utf-8")
+        self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252")
+
+def buildTestSuite():
+    for filename in html5lib_test_files("encoding"):
+        test_name = os.path.basename(filename).replace('.dat',''). \
+            replace('-','')
+        tests = TestData(filename, "data")
+        for idx, test in enumerate(tests):
+            def encodingTest(self, data=test['data'], 
+                             encoding=test['encoding']):
+                p = HTMLParser()
+                t = p.parse(data, useChardet=False)
+                
+                errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
+                                (data, repr(encoding.lower()), 
+                                 repr(p.tokenizer.stream.charEncoding)))
+                self.assertEquals(encoding.lower(),
+                                  p.tokenizer.stream.charEncoding[0], 
+                                  errorMessage)
+            setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
+                encodingTest)
+
+    try:
+        import chardet
+        def test_chardet(self):
+            data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
+            encoding = inputstream.HTMLInputStream(data).charEncoding
+            assert encoding[0].lower() == "big5"
+        setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
+    except ImportError:
+        print "chardet not found, skipping chardet tests"
+        
+
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
+def main():
+    buildTestSuite()
+    unittest.main()
+
+if __name__ == "__main__":
+    main()
diff --git a/html5lib/tests/test_formfiller.py b/html5lib/tests/test_formfiller.py
new file mode 100644
index 00000000..debc11b7
--- /dev/null
+++ b/html5lib/tests/test_formfiller.py
@@ -0,0 +1,296 @@
+import sys
+import unittest
+
+from html5lib.filters.formfiller import SimpleFilter
+
+class FieldStorage(dict):
+    def getlist(self, name):
+        l = self[name]
+        if isinstance(l, list):
+            return l
+        elif isinstance(l, tuple) or hasattr(l, '__iter__'):
+            return list(l)
+        return [l]
+
+class TestCase(unittest.TestCase):
+    def runTest(self, input, formdata, expected):
+        try:
+            output = list(SimpleFilter(input, formdata))
+        except NotImplementedError, nie:
+            # Amnesty for those that confess...
+            print >>sys.stderr, "Not implemented:", str(nie)
+        else:
+            errorMsg = "\n".join(["\n\nInput:", str(input),
+                                  "\nForm data:", str(formdata),
+                                  "\nExpected:", str(expected),
+                                  "\nReceived:", str(output)])
+            self.assertEquals(output, expected, errorMsg)
+
+    def testSingleTextInputWithValue(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"quux")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
+
+    def testSingleTextInputWithoutValue(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"text"), (u"name", u"foo")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
+
+    def testSingleCheckbox(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
+
+    def testSingleCheckboxShouldBeUnchecked(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
+
+    def testSingleCheckboxCheckedByDefault(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
+
+    def testSingleCheckboxCheckedByDefaultShouldBeUnchecked(self):
+        self.runTest(
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux"), (u"checked", u"")]}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"EmptyTag", "name": u"input",
+                "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
+
+    def testSingleTextareaWithValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"textarea", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"textarea", "data": []}])
+
+    def testSingleTextareaWithoutValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
+             {"type": u"EndTag", "name": u"textarea", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"textarea", "data": []}])
+
+    def testSingleSelectWithValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectWithValueShouldBeUnselected(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "quux"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectWithoutValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectWithoutValueShouldBeUnselected(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "quux"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectTwoOptionsWithValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectTwoOptionsWithValueShouldBeUnselected(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "quux"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectTwoOptionsWithoutValue(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "bar"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectTwoOptionsWithoutValueShouldBeUnselected(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"baz"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": "quux"}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"bar"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": []},
+             {"type": u"Characters", "data": u"baz"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testSingleSelectMultiple(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": ["bar", "quux"]}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+    def testTwoSelect(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []},
+             {"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}],
+            FieldStorage({"foo": ["bar", "quux"]}),
+            [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []},
+             {"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
+             {"type": u"Characters", "data": u"quux"},
+             {"type": u"EndTag", "name": u"option", "data": []},
+             {"type": u"EndTag", "name": u"select", "data": []}])
+
+def buildTestSuite():
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
+def main():
+    buildTestSuite()
+    unittest.main()
+
+if __name__ == "__main__":
+    main()
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
new file mode 100644
index 00000000..c6704faa
--- /dev/null
+++ b/html5lib/tests/test_parser.py
@@ -0,0 +1,140 @@
+import os
+import sys
+import traceback
+import StringIO
+import warnings
+import re
+
+warnings.simplefilter("error")
+
+from support import html5lib_test_files as data_files
+from support import TestData, convert, convertExpected
+import html5lib
+from html5lib import html5parser, treebuilders, constants
+
+treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
+             "DOM":treebuilders.getTreeBuilder("dom")}
+
+#Try whatever etree implementations are avaliable from a list that are
+#"supposed" to work
+try:
+    import xml.etree.ElementTree as ElementTree
+    treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
+except ImportError:
+    try:
+        import elementtree.ElementTree as ElementTree
+        treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
+    except ImportError:
+        pass
+
+try:
+    import xml.etree.cElementTree as cElementTree
+    treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
+except ImportError:
+    try:
+        import cElementTree
+        treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
+    except ImportError:
+        pass
+    
+try:
+    try:
+        import lxml.html as lxml
+    except ImportError:
+        import lxml.etree as lxml
+    treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
+except ImportError:
+    pass
+
+try:
+    import BeautifulSoup
+    treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
+except ImportError:
+    pass
+
+#Try whatever dom implementations are avaliable from a list that are
+#"supposed" to work
+try:
+    import pxdom
+    treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
+except ImportError:
+    pass
+
+#Run the parse error checks
+checkParseErrors = False
+
+#XXX - There should just be one function here but for some reason the testcase
+#format differs from the treedump format by a single space character
+def convertTreeDump(data):
+    return "\n".join(convert(3)(data).split("\n")[1:])
+
+namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
+
+
+def runParserTest(innerHTML, input, expected, errors, treeClass,
+                  namespaceHTMLElements):
+    #XXX - move this out into the setup function
+    #concatenate all consecutive character tokens into a single token
+    try:
+        p = html5parser.HTMLParser(tree = treeClass,
+                                   namespaceHTMLElements=namespaceHTMLElements)
+    except constants.DataLossWarning:
+        return
+
+    try:
+        if innerHTML:
+            document = p.parseFragment(input, innerHTML)
+        else:
+            try:
+                document = p.parse(input)
+            except constants.DataLossWarning:
+                return 
+    except:
+        errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
+                               u"\nTraceback:", traceback.format_exc()])
+        assert False, errorMsg.encode("utf8")
+
+    output = convertTreeDump(p.tree.testSerializer(document))
+
+    expected = convertExpected(expected)
+    if namespaceHTMLElements:
+        expected = namespaceExpected(r"\1<html \2>", expected)
+
+    errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
+                           u"\nReceived:", output])
+    assert expected == output, errorMsg.encode("utf8")
+    errStr = [u"Line: %i Col: %i %s"%(line, col, 
+                                      constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
+              ((line,col), errorcode, datavars) in p.errors]
+
+    errorMsg2 = u"\n".join([u"\n\nInput:", input,
+                            u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
+                            u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
+    if checkParseErrors:
+            assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
+
+def test_parser():
+    sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
+    files = data_files('tree-construction')
+    
+    for filename in files:
+        testName = os.path.basename(filename).replace(".dat","")
+
+        tests = TestData(filename, "data")
+        
+        for index, test in enumerate(tests):
+            input, errors, innerHTML, expected = [test[key] for key in
+                                                      'data', 'errors',
+                                                      'document-fragment',
+                                                      'document']
+            if errors:
+                errors = errors.split("\n")
+
+            for treeName, treeCls in treeTypes.iteritems():
+                for namespaceHTMLElements in (True, False):
+                    print input
+                    yield (runParserTest, innerHTML, input, expected, errors, treeCls,
+                           namespaceHTMLElements)
+                    break
+                
+                
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
new file mode 100755
index 00000000..3e1c80c3
--- /dev/null
+++ b/html5lib/tests/test_parser2.py
@@ -0,0 +1,39 @@
+import support
+from html5lib import html5parser
+from html5lib.constants import namespaces
+from html5lib.treebuilders import dom
+
+import unittest
+
+# tests that aren't autogenerated from text files
+class MoreParserTests(unittest.TestCase):
+
+  def test_assertDoctypeCloneable(self):
+    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+    doc = parser.parse('<!DOCTYPE HTML>')
+    self.assert_(doc.cloneNode(True))
+
+  def test_line_counter(self):
+    # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
+    parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
+    parser.parse("<pre>\nx\n&gt;\n</pre>")
+
+  def test_namespace_html_elements_0(self): 
+    parser = html5parser.HTMLParser(namespaceHTMLElements=True)
+    doc = parser.parse("<html></html>")
+    self.assert_(doc.childNodes[0].namespace == namespaces["html"])
+
+  def test_namespace_html_elements_1(self): 
+    parser = html5parser.HTMLParser(namespaceHTMLElements=False)
+    doc = parser.parse("<html></html>")
+    self.assert_(doc.childNodes[0].namespace == None)
+
+def buildTestSuite():
+  return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
+def main():
+    buildTestSuite()
+    unittest.main()
+
+if __name__ == '__main__':
+    main()
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
new file mode 100644
index 00000000..aabed780
--- /dev/null
+++ b/html5lib/tests/test_sanitizer.py
@@ -0,0 +1,76 @@
+import os
+import sys
+import unittest
+
+try:
+    import json
+except ImportError:
+    import simplejson as json
+
+from html5lib import html5parser, sanitizer, constants
+
+def runSanitizerTest(name, expected, input):
+    expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
+                         parseFragment(expected).childNodes])
+    expected = json.loads(json.dumps(expected))
+    assert expected == sanitize_html(input)
+
+def sanitize_html(stream):
+    return ''.join([token.toxml() for token in
+                    html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
+                     parseFragment(stream).childNodes])
+
+def test_should_handle_astral_plane_characters():
+    assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>&#x1d4b5; &#x1d538;</p>")
+
+def test_sanitizer():
+    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
+        if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
+            continue ### TODO
+        if tag_name != tag_name.lower():
+            continue ### TODO
+        if tag_name == 'image':
+            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+              "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
+              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+        elif tag_name == 'br':
+            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+              "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
+              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+        elif tag_name in constants.voidElements:
+            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+              "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
+              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+        else:
+            yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
+              "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name,tag_name),
+              "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+
+    for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
+        tag_name = tag_name.upper()
+        yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
+          "&lt;%s title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/%s&gt;" % (tag_name,tag_name),
+          "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
+
+    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
+        if attribute_name != attribute_name.lower(): continue ### TODO
+        if attribute_name == 'style': continue
+        yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
+          "<p %s=\"foo\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % attribute_name,
+          "<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
+
+    for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
+        attribute_name = attribute_name.upper()
+        yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
+          "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+          "<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
+
+    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
+        yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
+          "<a href=\"%s\">foo</a>" % protocol,
+          """<a href="%s">foo</a>""" % protocol)
+
+    for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
+        yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
+          "<a href=\"%s\">foo</a>" % protocol,
+        """<a href="%s">foo</a>""" % protocol)
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
new file mode 100644
index 00000000..132620b1
--- /dev/null
+++ b/html5lib/tests/test_serializer.py
@@ -0,0 +1,180 @@
+import os
+import unittest
+from support import html5lib_test_files
+
+try:
+    import json
+except ImportError:
+    import simplejson as json
+
+import html5lib
+from html5lib import html5parser, serializer, constants
+from html5lib.treewalkers._base import TreeWalker
+
+optionals_loaded = []
+
+try:
+    from lxml import etree
+    optionals_loaded.append("lxml")
+except ImportError:
+    pass
+
+default_namespace = constants.namespaces["html"]
+
+class JsonWalker(TreeWalker):
+    def __iter__(self):
+        for token in self.tree:
+            type = token[0]
+            if type == "StartTag":
+                if len(token) == 4:
+                    namespace, name, attrib = token[1:4]
+                else:
+                    namespace = default_namespace
+                    name, attrib = token[1:3]
+                yield self.startTag(namespace, name, self._convertAttrib(attrib))
+            elif type == "EndTag":
+                if len(token) == 3:
+                    namespace, name = token[1:3]
+                else:
+                    namespace = default_namespace
+                    name = token[1]
+                yield self.endTag(namespace, name)
+            elif type == "EmptyTag":
+                if len(token) == 4:
+                    namespace, name, attrib = token[1:]
+                else:
+                    namespace = default_namespace
+                    name, attrib = token[1:]
+                for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
+                    yield token
+            elif type == "Comment":
+                yield self.comment(token[1])
+            elif type in ("Characters", "SpaceCharacters"):
+                for token in self.text(token[1]):
+                    yield token
+            elif type == "Doctype":
+                if len(token) == 4:
+                    yield self.doctype(token[1], token[2], token[3])
+                elif len(token) == 3:
+                    yield self.doctype(token[1], token[2])
+                else:
+                    yield self.doctype(token[1])
+            else:
+                raise ValueError("Unknown token type: " + type)
+    
+    def _convertAttrib(self, attribs):
+        """html5lib tree-walkers use a dict of (namespace, name): value for
+        attributes, but JSON cannot represent this. Convert from the format
+        in the serializer tests (a list of dicts with "namespace", "name",
+        and "value" as keys) to html5lib's tree-walker format."""
+        attrs = {}
+        for attrib in attribs:
+            name = (attrib["namespace"], attrib["name"])
+            assert(name not in attrs)
+            attrs[name] = attrib["value"]
+        return attrs
+
+
+def serialize_html(input, options):
+    options = dict([(str(k),v) for k,v in options.iteritems()])
+    return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
+
+def serialize_xhtml(input, options):
+    options = dict([(str(k),v) for k,v in options.iteritems()])
+    return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
+
+def make_test(input, expected, xhtml, options):
+    result = serialize_html(input, options)
+    if len(expected) == 1:
+        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
+    elif result not in expected:
+        assert False, "Expected: %s, Received: %s" % (expected, result)
+
+    if not xhtml:
+        return
+
+    result = serialize_xhtml(input, options)
+    if len(xhtml) == 1:
+        assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
+    elif result not in xhtml:
+        assert False, "Expected: %s, Received: %s" % (xhtml, result)
+
+
+class EncodingTestCase(unittest.TestCase):
+    def throwsWithLatin1(self, input):
+        self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
+
+    def testDoctypeName(self):
+        self.throwsWithLatin1([["Doctype", u"\u0101"]])
+
+    def testDoctypePublicId(self):
+        self.throwsWithLatin1([["Doctype", u"potato", u"\u0101"]])
+
+    def testDoctypeSystemId(self):
+        self.throwsWithLatin1([["Doctype", u"potato", u"potato", u"\u0101"]])
+
+    def testCdataCharacters(self):
+        self.assertEquals("<style>&amacr;", serialize_html([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}],
+                                                            ["Characters", u"\u0101"]],
+                                                           {"encoding": "iso-8859-1"}))
+
+    def testCharacters(self):
+        self.assertEquals("&amacr;", serialize_html([["Characters", u"\u0101"]],
+                                                    {"encoding": "iso-8859-1"}))
+
+    def testStartTagName(self):
+        self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
+
+    def testEmptyTagName(self):
+        self.throwsWithLatin1([["EmptyTag", u"http://www.w3.org/1999/xhtml", u"\u0101", []]])
+
+    def testAttributeName(self):
+        self.throwsWithLatin1([["StartTag", u"http://www.w3.org/1999/xhtml", u"span", [{"namespace": None, "name": u"\u0101", "value": u"potato"}]]])
+
+    def testAttributeValue(self):
+        self.assertEquals("<span potato=&amacr;>", serialize_html([["StartTag", u"http://www.w3.org/1999/xhtml", u"span",
+                                                                    [{"namespace": None, "name": u"potato", "value": u"\u0101"}]]],
+                                                                  {"encoding": "iso-8859-1"}))
+
+    def testEndTagName(self):
+        self.throwsWithLatin1([["EndTag", u"http://www.w3.org/1999/xhtml", u"\u0101"]])
+
+    def testComment(self):
+        self.throwsWithLatin1([["Comment", u"\u0101"]])
+
+
+if "lxml" in optionals_loaded:
+    class LxmlTestCase(unittest.TestCase):
+        def setUp(self):
+            self.parser = etree.XMLParser(resolve_entities=False)
+            self.treewalker = html5lib.getTreeWalker("lxml")
+            self.serializer = serializer.HTMLSerializer()
+
+        def testEntityReplacement(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result)
+
+        def testEntityXML(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&gt;</html>""", result)
+
+        def testEntityNoResolve(self):
+            doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>"""
+            tree = etree.fromstring(doc, parser = self.parser).getroottree()
+            result = serializer.serialize(tree, tree="lxml", omit_optional_tags=False,
+                                          resolve_entities=False)
+            self.assertEquals(u"""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>&beta;</html>""", result)
+
+def test_serializer():
+    for filename in html5lib_test_files('serializer', '*.test'):
+        tests = json.load(file(filename))
+        test_name = os.path.basename(filename).replace('.test','')
+        for index, test in enumerate(tests['tests']):
+            xhtml = test.get("xhtml", test["expected"])
+            if test_name == 'optionaltags': 
+                xhtml = None
+            yield make_test, test["input"], test["expected"], xhtml, test.get("options", {})
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
new file mode 100755
index 00000000..3a266011
--- /dev/null
+++ b/html5lib/tests/test_stream.py
@@ -0,0 +1,97 @@
+import support
+import unittest, codecs
+
+from html5lib.inputstream import HTMLInputStream
+
+class HTMLInputStreamShortChunk(HTMLInputStream):
+    _defaultChunkSize = 2
+
+class HTMLInputStreamTest(unittest.TestCase):
+
+    def test_char_ascii(self):
+        stream = HTMLInputStream("'", encoding='ascii')
+        self.assertEquals(stream.charEncoding[0], 'ascii')
+        self.assertEquals(stream.char(), "'")
+
+    def test_char_null(self):
+        stream = HTMLInputStream("\x00")
+        self.assertEquals(stream.char(), u'\ufffd')
+
+    def test_char_utf8(self):
+        stream = HTMLInputStream(u'\u2018'.encode('utf-8'), encoding='utf-8')
+        self.assertEquals(stream.charEncoding[0], 'utf-8')
+        self.assertEquals(stream.char(), u'\u2018')
+
+    def test_char_win1252(self):
+        stream = HTMLInputStream(u"\xa9\xf1\u2019".encode('windows-1252'))
+        self.assertEquals(stream.charEncoding[0], 'windows-1252')
+        self.assertEquals(stream.char(), u"\xa9")
+        self.assertEquals(stream.char(), u"\xf1")
+        self.assertEquals(stream.char(), u"\u2019")
+
+    def test_bom(self):
+        stream = HTMLInputStream(codecs.BOM_UTF8 + "'")
+        self.assertEquals(stream.charEncoding[0], 'utf-8')
+        self.assertEquals(stream.char(), "'")
+
+    def test_utf_16(self):
+        stream = HTMLInputStream((' '*1025).encode('utf-16'))
+        self.assert_(stream.charEncoding[0] in ['utf-16-le', 'utf-16-be'], stream.charEncoding)
+        self.assertEquals(len(stream.charsUntil(' ', True)), 1025)
+
+    def test_newlines(self):
+        stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\r\nccc\rddddxe")
+        self.assertEquals(stream.position(), (1, 0))
+        self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
+        self.assertEquals(stream.position(), (3, 0))
+        self.assertEquals(stream.charsUntil('x'), u"ccc\ndddd")
+        self.assertEquals(stream.position(), (4, 4))
+        self.assertEquals(stream.charsUntil('e'), u"x")
+        self.assertEquals(stream.position(), (4, 5))
+
+    def test_newlines2(self):
+        size = HTMLInputStream._defaultChunkSize
+        stream = HTMLInputStream("\r" * size + "\n")
+        self.assertEquals(stream.charsUntil('x'), "\n" * size)
+
+    def test_position(self):
+        stream = HTMLInputStreamShortChunk(codecs.BOM_UTF8 + "a\nbb\nccc\nddde\nf\ngh")
+        self.assertEquals(stream.position(), (1, 0))
+        self.assertEquals(stream.charsUntil('c'), u"a\nbb\n")
+        self.assertEquals(stream.position(), (3, 0))
+        stream.unget(u"\n")
+        self.assertEquals(stream.position(), (2, 2))
+        self.assertEquals(stream.charsUntil('c'), u"\n")
+        self.assertEquals(stream.position(), (3, 0))
+        stream.unget(u"\n")
+        self.assertEquals(stream.position(), (2, 2))
+        self.assertEquals(stream.char(), u"\n")
+        self.assertEquals(stream.position(), (3, 0))
+        self.assertEquals(stream.charsUntil('e'), u"ccc\nddd")
+        self.assertEquals(stream.position(), (4, 3))
+        self.assertEquals(stream.charsUntil('h'), u"e\nf\ng")
+        self.assertEquals(stream.position(), (6, 1))
+
+    def test_position2(self):
+        stream = HTMLInputStreamShortChunk("abc\nd")
+        self.assertEquals(stream.position(), (1, 0))
+        self.assertEquals(stream.char(), u"a")
+        self.assertEquals(stream.position(), (1, 1))
+        self.assertEquals(stream.char(), u"b")
+        self.assertEquals(stream.position(), (1, 2))
+        self.assertEquals(stream.char(), u"c")
+        self.assertEquals(stream.position(), (1, 3))
+        self.assertEquals(stream.char(), u"\n")
+        self.assertEquals(stream.position(), (2, 0))
+        self.assertEquals(stream.char(), u"d")
+        self.assertEquals(stream.position(), (2, 1))
+
+def buildTestSuite():
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
+def main():
+    buildTestSuite()
+    unittest.main()
+
+if __name__ == '__main__':
+    main()
diff --git a/html5lib/tests/test_tokenizer.py b/html5lib/tests/test_tokenizer.py
new file mode 100644
index 00000000..1b76806a
--- /dev/null
+++ b/html5lib/tests/test_tokenizer.py
@@ -0,0 +1,193 @@
+import sys
+import os
+import unittest
+import cStringIO
+import warnings
+import re
+
+try:
+    import json
+except ImportError:
+    import simplejson as json
+
+from support import html5lib_test_files
+from html5lib.tokenizer import HTMLTokenizer
+from html5lib import constants
+
+class TokenizerTestParser(object):
+    def __init__(self, initialState, lastStartTag=None):
+        self.tokenizer = HTMLTokenizer
+        self._state = initialState
+        self._lastStartTag = lastStartTag
+
+    def parse(self, stream, encoding=None, innerHTML=False):
+        tokenizer = self.tokenizer(stream, encoding)
+        self.outputTokens = []
+
+        tokenizer.state = getattr(tokenizer, self._state)
+        if self._lastStartTag is not None:
+            tokenizer.currentToken = {"type": "startTag", 
+                                      "name":self._lastStartTag}
+
+        types = dict((v,k) for k,v in constants.tokenTypes.iteritems())
+        for token in tokenizer:
+            getattr(self, 'process%s' % types[token["type"]])(token)
+
+        return self.outputTokens
+
+    def processDoctype(self, token):
+        self.outputTokens.append([u"DOCTYPE", token["name"], token["publicId"],
+                                  token["systemId"], token["correct"]])
+
+    def processStartTag(self, token):
+        self.outputTokens.append([u"StartTag", token["name"], 
+                                  dict(token["data"][::-1]), token["selfClosing"]])
+
+    def processEmptyTag(self, token):
+        if token["name"] not in constants.voidElements:
+            self.outputTokens.append(u"ParseError")
+        self.outputTokens.append([u"StartTag", token["name"], dict(token["data"][::-1])])
+
+    def processEndTag(self, token):
+        self.outputTokens.append([u"EndTag", token["name"], 
+                                  token["selfClosing"]])
+
+    def processComment(self, token):
+        self.outputTokens.append([u"Comment", token["data"]])
+
+    def processSpaceCharacters(self, token):
+        self.outputTokens.append([u"Character", token["data"]])
+        self.processSpaceCharacters = self.processCharacters
+
+    def processCharacters(self, token):
+        self.outputTokens.append([u"Character", token["data"]])
+
+    def processEOF(self, token):
+        pass
+
+    def processParseError(self, token):
+        self.outputTokens.append([u"ParseError", token["data"]])
+
+def concatenateCharacterTokens(tokens):
+    outputTokens = []
+    for token in tokens:
+        if not "ParseError" in token and token[0] == "Character":
+            if (outputTokens and not "ParseError" in outputTokens[-1] and
+                outputTokens[-1][0] == "Character"):
+                outputTokens[-1][1] += token[1]
+            else:
+                outputTokens.append(token)
+        else:
+            outputTokens.append(token)
+    return outputTokens
+
+def normalizeTokens(tokens):
+    # TODO: convert tests to reflect arrays
+    for i, token in enumerate(tokens):
+        if token[0] == u'ParseError':
+            tokens[i] = token[0]
+    return tokens
+
+def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder,
+                ignoreErrors=False):
+    """Test whether the test has passed or failed
+
+    If the ignoreErrorOrder flag is set to true we don't test the relative
+    positions of parse errors and non parse errors
+    """
+    checkSelfClosing= False
+    for token in expectedTokens:
+        if (token[0] == "StartTag" and len(token) == 4
+            or token[0] == "EndTag" and len(token) == 3):
+            checkSelfClosing = True
+            break
+
+    if not checkSelfClosing:
+        for token in receivedTokens:
+            if token[0] == "StartTag" or token[0] == "EndTag":
+                token.pop()
+
+    if not ignoreErrorOrder and not ignoreErrors:
+        return expectedTokens == receivedTokens
+    else:
+        #Sort the tokens into two groups; non-parse errors and parse errors
+        tokens = {"expected":[[],[]], "received":[[],[]]}
+        for tokenType, tokenList in zip(tokens.keys(),
+                                         (expectedTokens, receivedTokens)):
+            for token in tokenList:
+                if token != "ParseError":
+                    tokens[tokenType][0].append(token)
+                else:
+                    if not ignoreErrors:
+                        tokens[tokenType][1].append(token)
+        return tokens["expected"] == tokens["received"]
+
+def unescape_test(test):
+    def decode(inp):
+        return inp.decode("unicode-escape")
+
+    test["input"] = decode(test["input"])
+    for token in test["output"]:
+        if token == "ParseError":
+            continue
+        else:
+            token[1] = decode(token[1])
+            if len(token) > 2:
+                for key, value in token[2]:
+                    del token[2][key]
+                    token[2][decode(key)] = decode(value)
+    return test
+
+
+def runTokenizerTest(test):
+    #XXX - move this out into the setup function
+    #concatenate all consecutive character tokens into a single token
+    if 'doubleEscaped' in test:
+        test = unescape_test(test)
+
+    expected = concatenateCharacterTokens(test['output'])            
+    if 'lastStartTag' not in test:
+        test['lastStartTag'] = None
+    outBuffer = cStringIO.StringIO()
+    stdout = sys.stdout
+    sys.stdout = outBuffer
+    parser = TokenizerTestParser(test['initialState'], 
+                                 test['lastStartTag'])
+    tokens = parser.parse(test['input'])
+    tokens = concatenateCharacterTokens(tokens)
+    received = normalizeTokens(tokens)
+    errorMsg = u"\n".join(["\n\nInitial state:",
+                          test['initialState'] ,
+                          "\nInput:", unicode(test['input']),
+                          "\nExpected:", unicode(expected),
+                          "\nreceived:", unicode(tokens)])
+    errorMsg = errorMsg.encode("utf-8")
+    ignoreErrorOrder = test.get('ignoreErrorOrder', False)
+    assert tokensMatch(expected, received, ignoreErrorOrder), errorMsg
+
+
+def _doCapitalize(match):
+    return match.group(1).upper()
+
+_capitalizeRe = re.compile(r"\W+(\w)").sub
+
+def capitalize(s):
+    s = s.lower()
+    s = _capitalizeRe(_doCapitalize, s)
+    return s
+
+
+def test_tokenizer():
+    for filename in html5lib_test_files('tokenizer', '*.test'):
+        tests = json.load(file(filename))
+        testName = os.path.basename(filename).replace(".test","")
+        if 'tests' in tests:
+            for index,test in enumerate(tests['tests']):
+                #Skip tests with a self closing flag
+                skip = False
+                if 'initialStates' not in test:
+                    test["initialStates"] = ["Data state"]
+                for initialState in test["initialStates"]:
+                    test["initialState"] = capitalize(initialState)
+                    yield runTokenizerTest, test
+
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
new file mode 100644
index 00000000..4b558bdd
--- /dev/null
+++ b/html5lib/tests/test_treewalkers.py
@@ -0,0 +1,311 @@
+import os
+import sys
+import StringIO
+import unittest
+import warnings
+
+warnings.simplefilter("error")
+
+from support import html5lib_test_files, TestData, convertExpected
+
+from html5lib import html5parser, treewalkers, treebuilders, constants
+from html5lib.filters.lint import Filter as LintFilter, LintError
+
+def PullDOMAdapter(node):
+    from xml.dom import Node
+    from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS
+
+    if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
+        for childNode in node.childNodes:
+            for event in PullDOMAdapter(childNode):
+                yield event
+
+    elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
+        raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM")
+
+    elif node.nodeType == Node.COMMENT_NODE:
+        yield COMMENT, node
+
+    elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+        yield CHARACTERS, node
+
+    elif node.nodeType == Node.ELEMENT_NODE:
+        yield START_ELEMENT, node
+        for childNode in node.childNodes:
+            for event in PullDOMAdapter(childNode):
+                yield event
+        yield END_ELEMENT, node
+
+    else:
+        raise NotImplementedError("Node type not supported: " + str(node.nodeType))
+
+treeTypes = {
+"simpletree":  {"builder": treebuilders.getTreeBuilder("simpletree"),
+                "walker":  treewalkers.getTreeWalker("simpletree")},
+"DOM":         {"builder": treebuilders.getTreeBuilder("dom"),
+                "walker":  treewalkers.getTreeWalker("dom")},
+"PullDOM":     {"builder": treebuilders.getTreeBuilder("dom"),
+                "adapter": PullDOMAdapter,
+                "walker":  treewalkers.getTreeWalker("pulldom")},
+}
+
+#Try whatever etree implementations are available from a list that are
+#"supposed" to work
+try:
+    import xml.etree.ElementTree as ElementTree
+    treeTypes['ElementTree'] = \
+        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+except ImportError:
+    try:
+        import elementtree.ElementTree as ElementTree
+        treeTypes['ElementTree'] = \
+            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+    except ImportError:
+        pass
+
+try:
+    import xml.etree.cElementTree as ElementTree
+    treeTypes['cElementTree'] = \
+        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+except ImportError:
+    try:
+        import cElementTree as ElementTree
+        treeTypes['cElementTree'] = \
+            {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+             "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+    except ImportError:
+        pass
+
+try:
+    import lxml.etree as ElementTree
+#    treeTypes['lxml_as_etree'] = \
+#        {"builder": treebuilders.getTreeBuilder("etree", ElementTree),
+#         "walker":  treewalkers.getTreeWalker("etree", ElementTree)}
+    treeTypes['lxml_native'] = \
+        {"builder": treebuilders.getTreeBuilder("lxml"),
+         "walker":  treewalkers.getTreeWalker("lxml")}
+except ImportError:
+    pass
+
+try:
+    import BeautifulSoup
+    treeTypes["beautifulsoup"] = \
+        {"builder": treebuilders.getTreeBuilder("beautifulsoup"),
+         "walker":  treewalkers.getTreeWalker("beautifulsoup")}
+except ImportError:
+    pass
+    
+#Try whatever etree implementations are available from a list that are
+#"supposed" to work
+try:
+    import pxdom
+    treeTypes['pxdom'] = \
+        {"builder": treebuilders.getTreeBuilder("dom", pxdom),
+         "walker":  treewalkers.getTreeWalker("dom")}
+except ImportError:
+    pass
+
+try:
+    from genshi.core import QName, Attrs
+    from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
+
+    def GenshiAdapter(tree):
+        text = None
+        for token in treewalkers.getTreeWalker("simpletree")(tree):
+            type = token["type"]
+            if type in ("Characters", "SpaceCharacters"):
+                if text is None:
+                    text = token["data"]
+                else:
+                    text += token["data"]
+            elif text is not None:
+                yield TEXT, text, (None, -1, -1)
+                text = None
+
+            if type in ("StartTag", "EmptyTag"):
+                if token["namespace"]:
+                    name = u"{%s}%s" % (token["namespace"], token["name"])
+                else:
+                    name = token["name"]
+                yield (START,
+                       (QName(name),
+                        Attrs([(QName(attr),value) for attr,value in token["data"]])),
+                       (None, -1, -1))
+                if type == "EmptyTag":
+                    type = "EndTag"
+
+            if type == "EndTag":
+                yield END, QName(token["name"]), (None, -1, -1)
+
+            elif type == "Comment":
+                yield COMMENT, token["data"], (None, -1, -1)
+
+            elif type == "Doctype":
+                yield DOCTYPE, (token["name"], token["publicId"], 
+                                token["systemId"]), (None, -1, -1)
+
+            else:
+                pass # FIXME: What to do?
+
+        if text is not None:
+            yield TEXT, text, (None, -1, -1)
+
+    #treeTypes["genshi"] = \
+    #    {"builder": treebuilders.getTreeBuilder("simpletree"),
+    #     "adapter": GenshiAdapter,
+    #     "walker":  treewalkers.getTreeWalker("genshi")}
+except ImportError:
+    pass
+
+def concatenateCharacterTokens(tokens):
+    charactersToken = None
+    for token in tokens:
+        type = token["type"]
+        if type in ("Characters", "SpaceCharacters"):
+            if charactersToken is None:
+                charactersToken = {"type": "Characters", "data": token["data"]}
+            else:
+                charactersToken["data"] += token["data"]
+        else:
+            if charactersToken is not None:
+                yield charactersToken
+                charactersToken = None
+            yield token
+    if charactersToken is not None:
+        yield charactersToken
+
+def convertTokens(tokens):
+    output = []
+    indent = 0
+    for token in concatenateCharacterTokens(tokens):
+        type = token["type"]
+        if type in ("StartTag", "EmptyTag"):
+            if (token["namespace"] and
+                token["namespace"] != constants.namespaces["html"]):
+                if token["namespace"] in constants.prefixes:
+                    name = constants.prefixes[token["namespace"]]
+                else:
+                    name = token["namespace"]
+                name += u" " + token["name"]
+            else:
+                name = token["name"]
+            output.append(u"%s<%s>" % (" "*indent, name))
+            indent += 2
+            attrs = token["data"]
+            if attrs:
+                #TODO: Remove this if statement, attrs should always exist
+                for (namespace,name),value in sorted(attrs.items()):
+                    if namespace:
+                        if namespace in constants.prefixes:
+                            outputname = constants.prefixes[namespace]
+                        else:
+                            outputname = namespace
+                        outputname += u" " + name
+                    else:
+                        outputname = name
+                    output.append(u"%s%s=\"%s\"" % (" "*indent, outputname, value))
+            if type == "EmptyTag":
+                indent -= 2
+        elif type == "EndTag":
+            indent -= 2
+        elif type == "Comment":
+            output.append("%s<!-- %s -->" % (" "*indent, token["data"]))
+        elif type == "Doctype":
+            if token["name"]:
+                if token["publicId"]:
+                    output.append("""%s<!DOCTYPE %s "%s" "%s">"""% 
+                                  (" "*indent, token["name"], 
+                                   token["publicId"],
+                                   token["systemId"] and token["systemId"] or ""))
+                elif token["systemId"]:
+                    output.append("""%s<!DOCTYPE %s "" "%s">"""% 
+                                  (" "*indent, token["name"], 
+                                   token["systemId"]))
+                else:
+                    output.append("%s<!DOCTYPE %s>"%(" "*indent,
+                                                     token["name"]))
+            else:
+                output.append("%s<!DOCTYPE >" % (" "*indent,))
+        elif type in ("Characters", "SpaceCharacters"):
+            output.append("%s\"%s\"" % (" "*indent, token["data"]))
+        else:
+            pass # TODO: what to do with errors?
+    return u"\n".join(output)
+
+import re
+attrlist = re.compile(r"^(\s+)\w+=.*(\n\1\w+=.*)+",re.M)
+def sortattrs(x):
+  lines = x.group(0).split("\n")
+  lines.sort()
+  return "\n".join(lines)
+
+
+class TokenTestCase(unittest.TestCase):
+    def test_all_tokens(self):
+        expected = [
+            {'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'},
+            {'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
+            {'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'head'},
+            {'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
+            {'data': u'a', 'type': 'Characters'},
+            {'data': {}, 'type': 'StartTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
+            {'data': u'b', 'type': 'Characters'},
+            {'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'div'},
+            {'data': u'c', 'type': 'Characters'},
+            {'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'body'},
+            {'data': {}, 'type': 'EndTag', 'namespace': u'http://www.w3.org/1999/xhtml', 'name': u'html'}
+            ]
+        for treeName, treeCls in treeTypes.iteritems():
+            p = html5parser.HTMLParser(tree = treeCls["builder"])
+            document = p.parse("<html><head></head><body>a<div>b</div>c</body></html>")
+            document = treeCls.get("adapter", lambda x: x)(document)
+            output = treeCls["walker"](document)
+            for expectedToken, outputToken in zip(expected, output):
+                self.assertEquals(expectedToken, outputToken)
+
+def run_test(innerHTML, input, expected, errors, treeClass):
+    try:
+        p = html5parser.HTMLParser(tree = treeClass["builder"])
+        if innerHTML:
+            document = p.parseFragment(StringIO.StringIO(input), innerHTML)
+        else:
+            document = p.parse(StringIO.StringIO(input))
+    except constants.DataLossWarning:
+        #Ignore testcases we know we don't pass
+        return
+
+    document = treeClass.get("adapter", lambda x: x)(document)
+    try:
+        output = convertTokens(treeClass["walker"](document))
+        output = attrlist.sub(sortattrs, output)
+        expected = attrlist.sub(sortattrs, convertExpected(expected))
+        assert expected == output, "\n".join([
+                "", "Input:", input,
+                "", "Expected:", expected,
+                "", "Received:", output
+                ])
+    except NotImplementedError:
+        pass # Amnesty for those that confess...
+            
+def test_treewalker():
+    sys.stdout.write('Testing tree walkers '+ " ".join(treeTypes.keys()) + "\n")
+
+    for treeName, treeCls in treeTypes.iteritems():
+        files = html5lib_test_files('tree-construction')
+        for filename in files:
+            testName = os.path.basename(filename).replace(".dat","")
+
+            tests = TestData(filename, "data")
+
+            for index, test in enumerate(tests):
+                (input, errors,
+                 innerHTML, expected) = [test[key] for key in ("data", "errors",
+                                                               "document-fragment",
+                                                               "document")]
+                errors = errors.split("\n")
+                yield run_test, innerHTML, input, expected, errors, treeCls
+
+
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
new file mode 100644
index 00000000..96a39dbc
--- /dev/null
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -0,0 +1,123 @@
+import unittest
+
+from html5lib.filters.whitespace import Filter
+from html5lib.constants import spaceCharacters
+spaceCharacters = u"".join(spaceCharacters)
+
+class TestCase(unittest.TestCase):
+    def runTest(self, input, expected):
+        output = list(Filter(input))
+        errorMsg = "\n".join(["\n\nInput:", str(input),
+                              "\nExpected:", str(expected),
+                              "\nReceived:", str(output)])
+        self.assertEquals(output, expected, errorMsg)
+
+    def runTestUnmodifiedOutput(self, input):
+        self.runTest(input, input)
+
+    def testPhrasingElements(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"Characters", "data": u"This is a " },
+             {"type": u"StartTag", "name": u"span", "data": [] },
+             {"type": u"Characters", "data": u"phrase" },
+             {"type": u"EndTag", "name": u"span", "data": []},
+             {"type": u"SpaceCharacters", "data": u" " },
+             {"type": u"Characters", "data": u"with" },
+             {"type": u"SpaceCharacters", "data": u" " },
+             {"type": u"StartTag", "name": u"em", "data": [] },
+             {"type": u"Characters", "data": u"emphasised text" },
+             {"type": u"EndTag", "name": u"em", "data": []},
+             {"type": u"Characters", "data": u" and an " },
+             {"type": u"StartTag", "name": u"img", "data": [[u"alt", u"image"]] },
+             {"type": u"Characters", "data": u"." }])
+
+    def testLeadingWhitespace(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"SpaceCharacters", "data": spaceCharacters},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"EndTag", "name": u"p", "data": []}],
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"SpaceCharacters", "data": u" "},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"EndTag", "name": u"p", "data": []}])
+
+    def testLeadingWhitespaceAsCharacters(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": spaceCharacters + u"foo"},
+             {"type": u"EndTag", "name": u"p", "data": []}],
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u" foo"},
+             {"type": u"EndTag", "name": u"p", "data": []}])
+
+    def testTrailingWhitespace(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"SpaceCharacters", "data": spaceCharacters},
+             {"type": u"EndTag", "name": u"p", "data": []}],
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"SpaceCharacters", "data": u" "},
+             {"type": u"EndTag", "name": u"p", "data": []}])
+
+    def testTrailingWhitespaceAsCharacters(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo" + spaceCharacters},
+             {"type": u"EndTag", "name": u"p", "data": []}],
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo "},
+             {"type": u"EndTag", "name": u"p", "data": []}])
+
+    def testWhitespace(self):
+        self.runTest(
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
+             {"type": u"EndTag", "name": u"p", "data": []}],
+            [{"type": u"StartTag", "name": u"p", "data": []},
+             {"type": u"Characters", "data": u"foo bar"},
+             {"type": u"EndTag", "name": u"p", "data": []}])
+
+    def testLeadingWhitespaceInPre(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"StartTag", "name": u"pre", "data": []},
+             {"type": u"SpaceCharacters", "data": spaceCharacters},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"EndTag", "name": u"pre", "data": []}])
+
+    def testLeadingWhitespaceAsCharactersInPre(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"StartTag", "name": u"pre", "data": []},
+             {"type": u"Characters", "data": spaceCharacters + u"foo"},
+             {"type": u"EndTag", "name": u"pre", "data": []}])
+
+    def testTrailingWhitespaceInPre(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"StartTag", "name": u"pre", "data": []},
+             {"type": u"Characters", "data": u"foo"},
+             {"type": u"SpaceCharacters", "data": spaceCharacters},
+             {"type": u"EndTag", "name": u"pre", "data": []}])
+
+    def testTrailingWhitespaceAsCharactersInPre(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"StartTag", "name": u"pre", "data": []},
+             {"type": u"Characters", "data": u"foo" + spaceCharacters},
+             {"type": u"EndTag", "name": u"pre", "data": []}])
+
+    def testWhitespaceInPre(self):
+        self.runTestUnmodifiedOutput(
+            [{"type": u"StartTag", "name": u"pre", "data": []},
+             {"type": u"Characters", "data": u"foo" + spaceCharacters + "bar"},
+             {"type": u"EndTag", "name": u"pre", "data": []}])
+
+def buildTestSuite():
+    return unittest.defaultTestLoader.loadTestsFromName(__name__)
+
+def main():
+    buildTestSuite()
+    unittest.main()
+
+if __name__ == "__main__":
+    main()
diff --git a/html5lib/tests/testdata/encoding/test-yahoo-jp.dat b/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
new file mode 100644
index 00000000..3c25ecb2
--- /dev/null
+++ b/html5lib/tests/testdata/encoding/test-yahoo-jp.dat
@@ -0,0 +1,10 @@
+#data
+<html>
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=euc-jp">
+<!--京-->
+<title>Yahoo! JAPAN</title>
+<meta name="description" content="日本最大級のポータルサイト。検索、オークション、ニュース、メール、コミュニティ、ショッピング、など80以上のサービスを展開。あなたの生活をより豊かにする「ライフ・エンジン」を目指していきます。">
+<style type="text/css" media="all">
+#encoding
+euc_jp
diff --git a/html5lib/tests/testdata/encoding/tests1.dat b/html5lib/tests/testdata/encoding/tests1.dat
new file mode 100644
index 00000000..5b585e73
--- /dev/null
+++ b/html5lib/tests/testdata/encoding/tests1.dat
@@ -0,0 +1,394 @@
+#data
+<!DOCTYPE HTML>
+<!-- (control test - for the other tests to work, this should pass - you may have to set your defaults appropriately) -->
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset='iso8859-2'>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset=iso8859-2>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta
+charset=iso8859-2>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<metacharset=iso8859-2>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content="text/html; charset=iso8859-2">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type">
+<!-- XXX this is a tough one, not sure how to do this one, unless we explictly do content= processing -->
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type" content=text/html; charset=iso8859-2>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type content="text/html; charset=iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type " content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type ">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Type>" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Type>">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta http-equiv="Content-Style-Type" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" http-equiv="Content-Style-Type">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta name="Content-Style-Type" content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2" name="Content-Style-Type">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="text/html; charset=iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content=" text/html; charset = iso8859-2 ">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta content="
+text/html; charset=iso8859-2
+" http-equiv="Content-Type">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="
+iso8859-2
+">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset=
+iso8859-2
+>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset=iso8859-2">
+<p>"</p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta " charset=iso8859-2>
+<p>"</p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test" charset=iso8859-2>
+<p>"</p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test=" charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="' charset=iso8859-2>
+<p>"'</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test='" charset=iso8859-2>
+<p>'"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta test="" charset=iso8859-2>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta test=x" charset=iso8859-2>
+<p>"</p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x" charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta test=x charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>
+<meta charset=iso8859-2>
+<p>"</p>
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<head></head><p title="x>">
+<meta charset=iso8859-2>
+<p>"</p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<meta charset="iso8859-2">
+#encoding
+Windows-1252
+
+#data
+<!DOCTYPE HTML>
+<meta charset="iso8859-2">
+<meta charset="ISO-8859-1">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="ISO-8859-1">-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<!--<meta charset="iso8859-2">-->
+<meta charset="ISO-8859-1">
+#encoding
+Windows-1252
+
+#data
+﻿<!DOCTYPE HTML>
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+﻿<!DOCTYPE HTML>
+<meta charset="ISO-8859-1">
+<!-- Starts with UTF-8 BOM -->
+#encoding
+UTF-8
+
+#data
+<!-- 511 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 512 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 1024 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 1025 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 2048 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 2049 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 4096 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 4097 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 8192 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- 8193 characters xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz-->
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
+
+#data
+<!-- multi-script test -->
+<script>alert('step 1 of 3 ("�")')</script>
+<!-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -->
+<script>alert('step 2 of 3 ("�")')</script>
+<meta charset="iso8859-2">
+<script>alert('step 3 of 3 ("�")')</script>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="ISO-8859-' + '2">')</script>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<script>document.write('<meta charset="iso8859-2">')</script>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<script type="text/plain"><meta charset="iso8859-2"></script>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<style type="text/plain"><meta charset="iso8859-2"></style>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<p><meta charset="iso8859-2"></p>
+#encoding
+iso8859-2
+
+#data
+<!DOCTYPE HTML>
+<meta charset="bogus">
+<meta charset="iso8859-2">
+#encoding
+iso8859-2
diff --git a/html5lib/tests/testdata/encoding/tests2.dat b/html5lib/tests/testdata/encoding/tests2.dat
new file mode 100644
index 00000000..eee44984
--- /dev/null
+++ b/html5lib/tests/testdata/encoding/tests2.dat
@@ -0,0 +1,115 @@
+#data
+<meta
+#encoding
+windows-1252
+
+#data
+<
+#encoding
+windows-1252
+
+#data
+<!
+#encoding
+windows-1252
+
+#data
+<meta charset = "
+#encoding
+windows-1252
+
+#data
+<meta charset=euc_jp
+#encoding
+windows-1252
+
+#data
+<meta <meta charset='euc_jp'>
+#encoding
+euc_jp
+
+#data
+<meta       charset    =     'euc_jp'>
+#encoding
+euc_jp
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+#encoding
+utf-8
+
+#data
+<!-- -->
+<meta http-equiv="Content-Type" content="text/html; charset=utf
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type<meta charset="utf-8">
+#encoding
+windows-1252
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8'">
+#encoding
+utf-8
+
+#data
+<meta http-equiv="Content-Type" content="text/html; charset='utf-8">
+#encoding
+windows-1252
+
+#data
+<meta                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 
+#encoding
+windows-1252
+
+#data
+<meta charset                    =                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            
+#encoding
+windows-1252
+
+#data
+<meta charset=                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            utf-8
+>
+#encoding
+utf-8
+
+#data
+<meta content = "text/html;
+#encoding
+windows-1252
+
+#data
+<meta charset="UTF-16">
+#encoding
+utf-8
+
+#data
+<meta charset="UTF-16LE">
+#encoding
+utf-8
+
+#data
+<meta charset="UTF-16BE">
+#encoding
+utf-8
+
+#data
+<html a=ñ>
+<meta charset="utf-8">
+#encoding
+utf-8
+
+#data
+<html ñ>
+<meta charset="utf-8">
+#encoding
+utf-8
+
+#data
+<html>ñ
+<meta charset="utf-8">
+#encoding
+utf-8
diff --git a/html5lib/tests/testdata/sanitizer/tests1.dat b/html5lib/tests/testdata/sanitizer/tests1.dat
new file mode 100644
index 00000000..c741cb8c
--- /dev/null
+++ b/html5lib/tests/testdata/sanitizer/tests1.dat
@@ -0,0 +1,501 @@
+[
+  {
+    "name": "IE_Comments",
+    "input": "<!--[if gte IE 4]><script>alert('XSS');</script><![endif]-->",
+    "output": ""
+  },
+
+  {
+    "name": "IE_Comments_2",
+    "input": "<![if !IE 5]><script>alert('XSS');</script><![endif]>",
+    "output": "&lt;script&gt;alert('XSS');&lt;/script&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "allow_colons_in_path_component",
+    "input": "<a href=\"./this:that\">foo</a>",
+    "output": "<a href='./this:that'>foo</a>"
+  },
+
+  {
+    "name": "background_attribute",
+    "input": "<div background=\"javascript:alert('XSS')\"></div>",
+    "output": "<div/>",
+    "xhtml": "<div></div>",
+    "rexml": "<div></div>"
+  },
+
+  {
+    "name": "bgsound",
+    "input": "<bgsound src=\"javascript:alert('XSS');\" />",
+    "output": "&lt;bgsound src=\"javascript:alert('XSS');\"/&gt;",
+    "rexml": "&lt;bgsound src=\"javascript:alert('XSS');\"&gt;&lt;/bgsound&gt;"
+  },
+
+  {
+    "name": "div_background_image_unicode_encoded",
+    "input": "<div style=\"background-image:\u00a5\u00a2\u006C\u0028'\u006a\u0061\u00a6\u0061\u00a3\u0063\u00a2\u0069\u00a0\u00a4\u003a\u0061\u006c\u0065\u00a2\u00a4\u0028.1027\u0058.1053\u0053\u0027\u0029'\u0029\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "div_expression",
+    "input": "<div style=\"width: expression(alert('XSS'));\">foo</div>",
+    "output": "<div style=''>foo</div>"
+  },
+
+  {
+    "name": "double_open_angle_brackets",
+    "input": "<img src=http://ha.ckers.org/scriptlet.html <",
+    "output": "<img src='http://ha.ckers.org/scriptlet.html'>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "double_open_angle_brackets_2",
+    "input": "<script src=http://ha.ckers.org/scriptlet.html <",
+    "output": "&lt;script src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "grave_accents",
+    "input": "<img src=`javascript:alert('XSS')` />",
+    "output": "<img/>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "img_dynsrc_lowsrc",
+    "input": "<img dynsrc=\"javascript:alert('XSS')\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "img_vbscript",
+    "input": "<img src='vbscript:msgbox(\"XSS\")' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "input_image",
+    "input": "<input type=\"image\" src=\"javascript:alert('XSS');\" />",
+    "output": "<input type='image'/>",
+    "rexml": "<input type='image' />"
+  },
+
+  {
+    "name": "link_stylesheets",
+    "input": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\" />",
+    "output": "&lt;link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/&gt;",
+    "rexml": "&lt;link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/&gt;"
+  },
+
+  {
+    "name": "link_stylesheets_2",
+    "input": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\" />",
+    "output": "&lt;link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/&gt;",
+    "rexml": "&lt;link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/&gt;"
+  },
+
+  {
+    "name": "list_style_image",
+    "input": "<li style=\"list-style-image: url(javascript:alert('XSS'))\">foo</li>",
+    "output": "<li style=''>foo</li>"
+  },
+
+  {
+    "name": "no_closing_script_tags",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "non_alpha_non_digit",
+    "input": "<script/XSS src=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "non_alpha_non_digit_2",
+    "input": "<a onclick!\\#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>",
+    "output": "<a>foo</a>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "non_alpha_non_digit_3",
+    "input": "<img/src=\"http://ha.ckers.org/xss.js\"/>",
+    "output": "<img src='http://ha.ckers.org/xss.js'/>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "non_alpha_non_digit_II",
+    "input": "<a href!\\#$%&()*~+-_.,:;?@[/|]^`=alert('XSS')>foo</a>",
+    "output": "<a>foo</a>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "non_alpha_non_digit_III",
+    "input": "<a/href=\"javascript:alert('XSS');\">foo</a>",
+    "output": "<a>foo</a>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "platypus",
+    "input": "<a href=\"http://www.ragingplatypus.com/\" style=\"display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;\">never trust your upstream platypus</a>",
+    "output": "<a href='http://www.ragingplatypus.com/' style='display: block; width: 100%; height: 100%; background-color: black; background-x: center; background-y: center;'>never trust your upstream platypus</a>"
+  },
+
+  {
+    "name": "protocol_resolution_in_script_tag",
+    "input": "<script src=//ha.ckers.org/.j></script>",
+    "output": "&lt;script src=\"//ha.ckers.org/.j\"&gt;&lt;/script&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_allow_anchors",
+    "input": "<a href='foo' onclick='bar'><script>baz</script></a>",
+    "output": "<a href='foo'>&lt;script&gt;baz&lt;/script&gt;</a>"
+  },
+
+  {
+    "name": "should_allow_image_alt_attribute",
+    "input": "<img alt='foo' onclick='bar' />",
+    "output": "<img alt='foo'/>",
+    "rexml": "<img alt='foo' />"
+  },
+
+  {
+    "name": "should_allow_image_height_attribute",
+    "input": "<img height='foo' onclick='bar' />",
+    "output": "<img height='foo'/>",
+    "rexml": "<img height='foo' />"
+  },
+
+  {
+    "name": "should_allow_image_src_attribute",
+    "input": "<img src='foo' onclick='bar' />",
+    "output": "<img src='foo'/>",
+    "rexml": "<img src='foo' />"
+  },
+
+  {
+    "name": "should_allow_image_width_attribute",
+    "input": "<img width='foo' onclick='bar' />",
+    "output": "<img width='foo'/>",
+    "rexml": "<img width='foo' />"
+  },
+
+  {
+    "name": "should_handle_blank_text",
+    "input": "",
+    "output": ""
+  },
+
+  {
+    "name": "should_handle_malformed_image_tags",
+    "input": "<img \"\"\"><script>alert(\"XSS\")</script>\">",
+    "output": "<img/>&lt;script&gt;alert(\"XSS\")&lt;/script&gt;\"&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_handle_non_html",
+    "input": "abc",
+    "output": "abc"
+  },
+
+  {
+    "name": "should_not_fall_for_ridiculous_hack",
+    "input": "<img\nsrc\n=\n\"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n\"\n />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_0",
+    "input": "<img src=\"javascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_1",
+    "input": "<img src=javascript:alert('XSS') />",
+    "output": "<img/>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_10",
+    "input": "<img src=\"jav&#x0A;ascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_11",
+    "input": "<img src=\"jav&#x0D;ascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_12",
+    "input": "<img src=\" &#14;  javascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_13",
+    "input": "<img src=\"&#x20;javascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_14",
+    "input": "<img src=\"&#xA0;javascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_2",
+    "input": "<img src=\"JaVaScRiPt:alert('XSS')\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_3",
+    "input": "<img src='javascript:alert(&quot;XSS&quot;)' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_4",
+    "input": "<img src='javascript:alert(String.fromCharCode(88,83,83))' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_5",
+    "input": "<img src='&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_6",
+    "input": "<img src='&#0000106;&#0000097;&#0000118;&#0000097;&#0000115;&#0000099;&#0000114;&#0000105;&#0000112;&#0000116;&#0000058;&#0000097;&#0000108;&#0000101;&#0000114;&#0000116;&#0000040;&#0000039;&#0000088;&#0000083;&#0000083;&#0000039;&#0000041' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_7",
+    "input": "<img src='&#x6A;&#x61;&#x76;&#x61;&#x73;&#x63;&#x72;&#x69;&#x70;&#x74;&#x3A;&#x61;&#x6C;&#x65;&#x72;&#x74;&#x28;&#x27;&#x58;&#x53;&#x53;&#x27;&#x29' />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_8",
+    "input": "<img src=\"jav\tascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_not_fall_for_xss_image_hack_9",
+    "input": "<img src=\"jav&#x09;ascript:alert('XSS');\" />",
+    "output": "<img/>",
+    "rexml": "<img />"
+  },
+
+  {
+    "name": "should_sanitize_half_open_scripts",
+    "input": "<img src=\"javascript:alert('XSS')\"",
+    "output": "<img/>",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_sanitize_invalid_script_tag",
+    "input": "<script/XSS SRC=\"http://ha.ckers.org/xss.js\"></script>",
+    "output": "&lt;script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"&gt;&lt;/script&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets",
+    "input": "<<script>alert(\"XSS\");//<</script>",
+    "output": "&lt;&lt;script&gt;alert(\"XSS\");//&lt;&lt;/script&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
+    "input": "<iframe src=http://ha.ckers.org/scriptlet.html\n<",
+    "output": "&lt;iframe src=\"http://ha.ckers.org/scriptlet.html\" &lt;=\"\"&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_sanitize_tag_broken_up_by_null",
+    "input": "<scr\u0000ipt>alert(\"XSS\")</scr\u0000ipt>",
+    "output": "&lt;scr\ufffdipt&gt;alert(\"XSS\")&lt;/scr\ufffdipt&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_sanitize_unclosed_script",
+    "input": "<script src=http://ha.ckers.org/xss.js?<b>",
+    "output": "&lt;script src=\"http://ha.ckers.org/xss.js?&amp;lt;b\"&gt;",
+    "rexml": "Ill-formed XHTML!"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols",
+    "input": "<a href=\"javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_href_attribute_in_a_with_bad_protocols_and_whitespace",
+    "input": "<a href=\" javascript:XSS\" title=\"1\">boo</a>",
+    "output": "<a title='1'>boo</a>"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols",
+    "input": "<img src=\"javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo",
+    "rexml": "<img title='1' />"
+  },
+
+  {
+    "name": "should_strip_src_attribute_in_img_with_bad_protocols_and_whitespace",
+    "input": "<img src=\" javascript:XSS\" title=\"1\">boo</img>",
+    "output": "<img title='1'/>boo",
+    "rexml": "<img title='1' />"
+  },
+
+  {
+    "name": "xml_base",
+    "input": "<div xml:base=\"javascript:alert('XSS');//\">foo</div>",
+    "output": "<div>foo</div>"
+  },
+
+  {
+    "name": "xul",
+    "input": "<p style=\"-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss')\">fubar</p>",
+    "output": "<p style=''>fubar</p>"
+  },
+
+  {
+    "name": "quotes_in_attributes",
+    "input": "<img src='foo' title='\"foo\" bar' />",
+    "rexml": "<img src='foo' title='\"foo\" bar' />",
+    "output": "<img title='&quot;foo&quot; bar' src='foo'/>"
+  },
+
+  {
+    "name": "uri_refs_in_svg_attributes",
+    "input": "<rect fill='url(#foo)' />",
+    "rexml": "<rect fill='url(#foo)'></rect>",
+    "xhtml": "<rect fill='url(#foo)'></rect>",
+    "output": "<rect fill='url(#foo)'/>"
+  },
+
+  {
+    "name": "absolute_uri_refs_in_svg_attributes",
+    "input": "<rect fill='url(http://bad.com/) #fff' />",
+    "rexml": "<rect fill='  #fff'></rect>",
+    "xhtml": "<rect fill='  #fff'></rect>",
+    "output": "<rect fill='  #fff'/>"
+  },
+
+  {
+    "name": "uri_ref_with_space_in svg_attribute",
+    "input": "<rect fill='url(\n#foo)' />",
+    "rexml": "<rect fill='url(\n#foo)'></rect>",
+    "xhtml": "<rect fill='url(\n#foo)'></rect>",
+    "output": "<rect fill='url(\n#foo)'/>"
+  },
+
+  {
+    "name": "absolute_uri_ref_with_space_in svg_attribute",
+    "input": "<rect fill=\"url(\nhttp://bad.com/)\" />",
+    "rexml": "<rect fill=' '></rect>",
+    "xhtml": "<rect fill=' '></rect>",
+    "output": "<rect fill=' '/>"
+  },
+
+  {
+    "name": "allow_html5_image_tag",
+    "input": "<image src='foo' />",
+    "rexml": "&lt;image src=\"foo\"&gt;&lt;/image&gt;",
+    "output": "&lt;image src=\"foo\"/&gt;"
+  },
+
+  {
+    "name": "style_attr_end_with_nothing",
+    "input": "<div style=\"color: blue\" />",
+    "output": "<div style='color: blue;'/>",
+    "xhtml": "<div style='color: blue;'></div>",
+    "rexml": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_space",
+    "input": "<div style=\"color: blue \" />",
+    "output": "<div style='color: blue ;'/>",
+    "xhtml": "<div style='color: blue ;'></div>",
+    "rexml": "<div style='color: blue ;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon",
+    "input": "<div style=\"color: blue;\" />",
+    "output": "<div style='color: blue;'/>",
+    "xhtml": "<div style='color: blue;'></div>",
+    "rexml": "<div style='color: blue;'></div>"
+  },
+
+  {
+    "name": "style_attr_end_with_semicolon_space",
+    "input": "<div style=\"color: blue; \" />",
+    "output": "<div style='color: blue;'/>",
+    "xhtml": "<div style='color: blue;'></div>",
+    "rexml": "<div style='color: blue;'></div>"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes",
+   "input": "<img src=doesntexist.jpg\"'onerror=\"alert(1) />",
+   "output": "<img src='doesntexist.jpg&quot;&apos;onerror=&quot;alert(1)'/>",
+   "rexml": "Ill-formed XHTML!"
+  },
+  
+  {
+   "name": "attributes_with_embedded_quotes_II",
+   "input": "<img src=notthere.jpg\"\"onerror=\"alert(2) />",
+   "output": "<img src='notthere.jpg&quot;&quot;onerror=&quot;alert(2)'/>",
+   "rexml": "Ill-formed XHTML!"
+  }
+]
diff --git a/html5lib/tests/testdata/serializer/core.test b/html5lib/tests/testdata/serializer/core.test
new file mode 100644
index 00000000..c0b4222d
--- /dev/null
+++ b/html5lib/tests/testdata/serializer/core.test
@@ -0,0 +1,125 @@
+{"tests": [
+
+{"description": "proper attribute value escaping",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test \"with\" &quot;"}]]],
+ "expected": ["<span title='test \"with\" &amp;quot;'>"]
+},
+
+{"description": "proper attribute value non-quoting",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo"}]]],
+ "expected": ["<span title=foo>"],
+ "xhtml":    ["<span title=\"foo\">"]
+},
+
+{"description": "proper attribute value non-quoting (with <)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo<bar"}]]],
+ "expected": ["<span title=foo<bar>"],
+ "xhtml":    ["<span title=\"foo&lt;bar\">"]
+},
+
+{"description": "proper attribute value quoting (with =)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo=bar"}]]],
+ "expected": ["<span title=\"foo=bar\">"]
+},
+
+{"description": "proper attribute value quoting (with >)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo>bar"}]]],
+ "expected": ["<span title=\"foo>bar\">"]
+},
+
+{"description": "proper attribute value quoting (with \")",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\"bar"}]]],
+ "expected": ["<span title='foo\"bar'>"]
+},
+
+{"description": "proper attribute value quoting (with ')",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar"}]]],
+ "expected": ["<span title=\"foo'bar\">"]
+},
+
+{"description": "proper attribute value quoting (with both \" and ')",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo'bar\"baz"}]]],
+ "expected": ["<span title=\"foo'bar&quot;baz\">"]
+},
+
+{"description": "proper attribute value quoting (with space)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo bar"}]]],
+ "expected": ["<span title=\"foo bar\">"]
+},
+
+{"description": "proper attribute value quoting (with tab)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\tbar"}]]],
+ "expected": ["<span title=\"foo\tbar\">"]
+},
+
+{"description": "proper attribute value quoting (with LF)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\nbar"}]]],
+ "expected": ["<span title=\"foo\nbar\">"]
+},
+
+{"description": "proper attribute value quoting (with CR)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\rbar"}]]],
+ "expected": ["<span title=\"foo\rbar\">"]
+},
+
+{"description": "proper attribute value non-quoting (with linetab)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Bbar"}]]],
+ "expected": ["<span title=foo\u000Bbar>"],
+ "xhtml": ["<span title=\"foo\u000Bbar\">"]
+},
+
+{"description": "proper attribute value quoting (with form feed)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "foo\u000Cbar"}]]],
+ "expected": ["<span title=\"foo\u000Cbar\">"]
+},
+
+{"description": "void element (as EmptyTag token)",
+ "input": [["EmptyTag", "img", {}]],
+ "expected": ["<img>"],
+ "xhtml":    ["<img />"]
+},
+
+{"description": "void element (as StartTag token)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "img", {}]],
+ "expected": ["<img>"],
+ "xhtml":    ["<img />"]
+},
+
+{"description": "doctype in error",
+ "input": [["Doctype", "foo"]],
+ "expected": ["<!DOCTYPE foo>"]
+},
+
+{"description": "character data",
+ "options": {"encoding":"utf-8"},
+ "input": [["Characters", "a<b>c&d"]],
+ "expected": ["a&lt;b&gt;c&amp;d"]
+},
+
+{"description": "rcdata",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a<b>c&d"],
+ "xhtml": ["<script>a&lt;b&gt;c&amp;d"]
+},
+
+{"description": "doctype",
+ "input": [["Doctype", "HTML"]],
+ "expected": ["<!DOCTYPE HTML>"]
+},
+
+{"description": "HTML 4.01 DOCTYPE",
+ "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN", "http://www.w3.org/TR/html4/strict.dtd"]],
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"]
+},
+
+{"description": "HTML 4.01 DOCTYPE without system identifer",
+ "input": [["Doctype", "HTML",  "-//W3C//DTD HTML 4.01//EN"]],
+ "expected": ["<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">"]
+},
+
+{"description": "IBM DOCTYPE without public identifer",
+ "input": [["Doctype", "html",  "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]],
+ "expected": ["<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">"]
+}
+
+]}
diff --git a/html5lib/tests/testdata/serializer/injectmeta.test b/html5lib/tests/testdata/serializer/injectmeta.test
new file mode 100644
index 00000000..feaaa44f
--- /dev/null
+++ b/html5lib/tests/testdata/serializer/injectmeta.test
@@ -0,0 +1,66 @@
+{"tests": [
+
+{"description": "no encoding",
+ "options": {"inject_meta_charset": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""],
+ "xhtml": ["<head></head>"]
+},
+
+{"description": "empytag head",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/title",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml","title",{}], ["Characters", "foo"],["EndTag", "http://www.w3.org/1999/xhtml", "title"], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><title>foo</title>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /><title>foo</title></head>"]
+},
+
+{"description": "head w/meta-charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/ two meta-charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><meta charset=utf-8>", "<head><meta charset=utf-8><meta charset=ascii>"],
+ "xhtml": ["<head><meta charset=\"utf-8\" /><meta charset=\"utf-8\" /></head>", "<head><meta charset=\"utf-8\" /><meta charset=\"ascii\" /></head>"]
+},
+
+{"description": "head w/robots",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta charset=utf-8><meta content=noindex name=robots>"],
+ "xhtml":    ["<head><meta charset=\"utf-8\" /><meta content=\"noindex\" name=\"robots\" /></head>"]
+},
+
+{"description": "head w/robots & charset",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "charset", "value": "ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=noindex name=robots><meta charset=utf-8>"],
+ "xhtml":    ["<head><meta content=\"noindex\" name=\"robots\" /><meta charset=\"utf-8\" /></head>"]
+},
+
+{"description": "head w/ charset in http-equiv content-type",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
+ "xhtml":    ["<head><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
+},
+
+{"description": "head w/robots & charset in http-equiv content-type",
+ "options": {"inject_meta_charset": true, "encoding":"utf-8"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag","meta",[{"namespace": null, "name": "name", "value": "robots"},{"namespace": null, "name": "content", "value": "noindex"}]], ["EmptyTag","meta",[{"namespace": null, "name": "http-equiv", "value": "content-type"}, {"namespace": null, "name": "content", "value": "text/html; charset=ascii"}]], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": ["<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>"],
+ "xhtml": ["<head><meta content=\"noindex\" name=\"robots\" /><meta content=\"text/html; charset=utf-8\" http-equiv=\"content-type\" /></head>"]
+}
+
+]}
diff --git a/html5lib/tests/testdata/serializer/optionaltags.test b/html5lib/tests/testdata/serializer/optionaltags.test
new file mode 100644
index 00000000..80a5edf8
--- /dev/null
+++ b/html5lib/tests/testdata/serializer/optionaltags.test
@@ -0,0 +1,965 @@
+{"tests": [
+
+{"description": "html start-tag followed by text, with attributes",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", [{"namespace": null, "name": "lang", "value": "en"}]], ["Characters", "foo"]],
+ "expected": ["<html lang=en>foo"]
+},
+
+
+
+{"description": "html start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Comment", "foo"]],
+ "expected": ["<html><!--foo-->"]
+},
+
+{"description": "html start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", " foo"]],
+ "expected": ["<html> foo"]
+},
+
+{"description": "html start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "html start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "html start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "html start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "html", {}]],
+ "expected": [""]
+},
+
+
+
+{"description": "html end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Comment", "foo"]],
+ "expected": ["</html><!--foo-->"]
+},
+
+{"description": "html end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", " foo"]],
+ "expected": ["</html> foo"]
+},
+
+{"description": "html end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "html end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "html end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "html end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "html"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "head start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Comment", "foo"]],
+ "expected": ["<head><!--foo-->"]
+},
+
+{"description": "head start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", " foo"]],
+ "expected": ["<head> foo"]
+},
+
+{"description": "head start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["Characters", "foo"]],
+ "expected": ["<head>foo"]
+},
+
+{"description": "head start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head start-tag followed by end-tag (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<head></foo>", "</foo>"]
+},
+
+{"description": "empty head element",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""]
+},
+
+{"description": "head start-tag followed by empty-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}], ["EmptyTag", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "head", {}]],
+ "expected": ["<head>", ""]
+},
+
+
+
+{"description": "head end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Comment", "foo"]],
+ "expected": ["</head><!--foo-->"]
+},
+
+{"description": "head end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", " foo"]],
+ "expected": ["</head> foo"]
+},
+
+{"description": "head end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "head end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "head end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "head end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "head"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "body start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Comment", "foo"]],
+ "expected": ["<body><!--foo-->"]
+},
+
+{"description": "body start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", " foo"]],
+ "expected": ["<body> foo"]
+},
+
+{"description": "body start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "body start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "body start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "body start-tag at EOF (shouldn't ever happen?!)",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "body", {}]],
+ "expected": [""]
+},
+
+
+
+{"description": "body end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Comment", "foo"]],
+ "expected": ["</body><!--foo-->"]
+},
+
+{"description": "body end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", " foo"]],
+ "expected": ["</body> foo"]
+},
+
+{"description": "body end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "body end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "body end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "body end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "body"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "li end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Comment", "foo"]],
+ "expected": ["</li><!--foo-->"]
+},
+
+{"description": "li end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", " foo"]],
+ "expected": ["</li> foo"]
+},
+
+{"description": "li end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["Characters", "foo"]],
+ "expected": ["</li>foo"]
+},
+
+{"description": "li end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</li><foo>"]
+},
+
+{"description": "li end-tag followed by li start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["StartTag", "http://www.w3.org/1999/xhtml", "li", {}]],
+ "expected": ["<li>"]
+},
+
+{"description": "li end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "li end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "li"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "dt end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Comment", "foo"]],
+ "expected": ["</dt><!--foo-->"]
+},
+
+{"description": "dt end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", " foo"]],
+ "expected": ["</dt> foo"]
+},
+
+{"description": "dt end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["Characters", "foo"]],
+ "expected": ["</dt>foo"]
+},
+
+{"description": "dt end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</dt><foo>"]
+},
+
+{"description": "dt end-tag followed by dt start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
+ "expected": ["<dt>"]
+},
+
+{"description": "dt end-tag followed by dd start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
+ "expected": ["<dd>"]
+},
+
+{"description": "dt end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</dt></foo>"]
+},
+
+{"description": "dt end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dt"]],
+ "expected": ["</dt>"]
+},
+
+
+
+
+{"description": "dd end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Comment", "foo"]],
+ "expected": ["</dd><!--foo-->"]
+},
+
+{"description": "dd end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", " foo"]],
+ "expected": ["</dd> foo"]
+},
+
+{"description": "dd end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["Characters", "foo"]],
+ "expected": ["</dd>foo"]
+},
+
+{"description": "dd end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</dd><foo>"]
+},
+
+{"description": "dd end-tag followed by dd start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dd", {}]],
+ "expected": ["<dd>"]
+},
+
+{"description": "dd end-tag followed by dt start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["StartTag", "http://www.w3.org/1999/xhtml", "dt", {}]],
+ "expected": ["<dt>"]
+},
+
+{"description": "dd end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "dd end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "dd"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "p end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Comment", "foo"]],
+ "expected": ["</p><!--foo-->"]
+},
+
+{"description": "p end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", " foo"]],
+ "expected": ["</p> foo"]
+},
+
+{"description": "p end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["Characters", "foo"]],
+ "expected": ["</p>foo"]
+},
+
+{"description": "p end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</p><foo>"]
+},
+
+{"description": "p end-tag followed by address start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "address", {}]],
+ "expected": ["<address>"]
+},
+
+{"description": "p end-tag followed by article start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "article", {}]],
+ "expected": ["<article>"]
+},
+
+{"description": "p end-tag followed by aside start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "aside", {}]],
+ "expected": ["<aside>"]
+},
+
+{"description": "p end-tag followed by blockquote start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "blockquote", {}]],
+ "expected": ["<blockquote>"]
+},
+
+{"description": "p end-tag followed by datagrid start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "datagrid", {}]],
+ "expected": ["<datagrid>"]
+},
+
+{"description": "p end-tag followed by dialog start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dialog", {}]],
+ "expected": ["<dialog>"]
+},
+
+{"description": "p end-tag followed by dir start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dir", {}]],
+ "expected": ["<dir>"]
+},
+
+{"description": "p end-tag followed by div start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
+ "expected": ["<div>"]
+},
+
+{"description": "p end-tag followed by dl start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "dl", {}]],
+ "expected": ["<dl>"]
+},
+
+{"description": "p end-tag followed by fieldset start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "fieldset", {}]],
+ "expected": ["<fieldset>"]
+},
+
+{"description": "p end-tag followed by footer start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "footer", {}]],
+ "expected": ["<footer>"]
+},
+
+{"description": "p end-tag followed by form start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "form", {}]],
+ "expected": ["<form>"]
+},
+
+{"description": "p end-tag followed by h1 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h1", {}]],
+ "expected": ["<h1>"]
+},
+
+{"description": "p end-tag followed by h2 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h2", {}]],
+ "expected": ["<h2>"]
+},
+
+{"description": "p end-tag followed by h3 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h3", {}]],
+ "expected": ["<h3>"]
+},
+
+{"description": "p end-tag followed by h4 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h4", {}]],
+ "expected": ["<h4>"]
+},
+
+{"description": "p end-tag followed by h5 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h5", {}]],
+ "expected": ["<h5>"]
+},
+
+{"description": "p end-tag followed by h6 start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "h6", {}]],
+ "expected": ["<h6>"]
+},
+
+{"description": "p end-tag followed by header start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "header", {}]],
+ "expected": ["<header>"]
+},
+
+{"description": "p end-tag followed by hr empty-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EmptyTag", "hr", {}]],
+ "expected": ["<hr>"]
+},
+
+{"description": "p end-tag followed by menu start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "menu", {}]],
+ "expected": ["<menu>"]
+},
+
+{"description": "p end-tag followed by nav start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "nav", {}]],
+ "expected": ["<nav>"]
+},
+
+{"description": "p end-tag followed by ol start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ol", {}]],
+ "expected": ["<ol>"]
+},
+
+{"description": "p end-tag followed by p start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "p", {}]],
+ "expected": ["<p>"]
+},
+
+{"description": "p end-tag followed by pre start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}]],
+ "expected": ["<pre>"]
+},
+
+{"description": "p end-tag followed by section start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "section", {}]],
+ "expected": ["<section>"]
+},
+
+{"description": "p end-tag followed by table start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "table", {}]],
+ "expected": ["<table>"]
+},
+
+{"description": "p end-tag followed by ul start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["StartTag", "http://www.w3.org/1999/xhtml", "ul", {}]],
+ "expected": ["<ul>"]
+},
+
+{"description": "p end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "p end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "p"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "optgroup end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Comment", "foo"]],
+ "expected": ["</optgroup><!--foo-->"]
+},
+
+{"description": "optgroup end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", " foo"]],
+ "expected": ["</optgroup> foo"]
+},
+
+{"description": "optgroup end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["Characters", "foo"]],
+ "expected": ["</optgroup>foo"]
+},
+
+{"description": "optgroup end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</optgroup><foo>"]
+},
+
+{"description": "optgroup end-tag followed by optgroup start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
+ "expected": ["<optgroup>"]
+},
+
+{"description": "optgroup end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "optgroup end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "optgroup"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "option end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Comment", "foo"]],
+ "expected": ["</option><!--foo-->"]
+},
+
+{"description": "option end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", " foo"]],
+ "expected": ["</option> foo"]
+},
+
+{"description": "option end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["Characters", "foo"]],
+ "expected": ["</option>foo"]
+},
+
+{"description": "option end-tag followed by optgroup start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "optgroup", {}]],
+ "expected": ["<optgroup>"]
+},
+
+{"description": "option end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</option><foo>"]
+},
+
+{"description": "option end-tag followed by option start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["StartTag", "http://www.w3.org/1999/xhtml", "option", {}]],
+ "expected": ["<option>"]
+},
+
+{"description": "option end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "option end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "option"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "colgroup start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Comment", "foo"]],
+ "expected": ["<colgroup><!--foo-->"]
+},
+
+{"description": "colgroup start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", " foo"]],
+ "expected": ["<colgroup> foo"]
+},
+
+{"description": "colgroup start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["Characters", "foo"]],
+ "expected": ["<colgroup>foo"]
+},
+
+{"description": "colgroup start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<colgroup><foo>"]
+},
+
+{"description": "first colgroup in a table with a col child",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EmptyTag", "col", {}]],
+ "expected": ["<table><col>"]
+},
+
+{"description": "colgroup with a col child, following another colgroup",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "col", {}]],
+ "expected": ["</colgroup><col>", "<colgroup><col>"]
+},
+
+{"description": "colgroup start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<colgroup></foo>"]
+},
+
+{"description": "colgroup start-tag at EOF",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "colgroup", {}]],
+ "expected": ["<colgroup>"]
+},
+
+
+
+{"description": "colgroup end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Comment", "foo"]],
+ "expected": ["</colgroup><!--foo-->"]
+},
+
+{"description": "colgroup end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", " foo"]],
+ "expected": ["</colgroup> foo"]
+},
+
+{"description": "colgroup end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["Characters", "foo"]],
+ "expected": ["foo"]
+},
+
+{"description": "colgroup end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<foo>"]
+},
+
+{"description": "colgroup end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "colgroup end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "colgroup"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "thead end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Comment", "foo"]],
+ "expected": ["</thead><!--foo-->"]
+},
+
+{"description": "thead end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", " foo"]],
+ "expected": ["</thead> foo"]
+},
+
+{"description": "thead end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["Characters", "foo"]],
+ "expected": ["</thead>foo"]
+},
+
+{"description": "thead end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</thead><foo>"]
+},
+
+{"description": "thead end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>"]
+},
+
+{"description": "thead end-tag followed by tfoot start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
+ "expected": ["<tfoot>"]
+},
+
+{"description": "thead end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</thead></foo>"]
+},
+
+{"description": "thead end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"]],
+ "expected": ["</thead>"]
+},
+
+
+
+
+{"description": "tbody start-tag followed by comment",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Comment", "foo"]],
+ "expected": ["<tbody><!--foo-->"]
+},
+
+{"description": "tbody start-tag followed by space character",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", " foo"]],
+ "expected": ["<tbody> foo"]
+},
+
+{"description": "tbody start-tag followed by text",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["Characters", "foo"]],
+ "expected": ["<tbody>foo"]
+},
+
+{"description": "tbody start-tag followed by start-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["<tbody><foo>"]
+},
+
+{"description": "first tbody in a table with a tr child",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "table", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<table><tr>"]
+},
+
+{"description": "tbody with a tr child, following another tbody",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</tbody><tr>"]
+},
+
+{"description": "tbody with a tr child, following a thead",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "thead"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</thead><tr>"]
+},
+
+{"description": "tbody with a tr child, following a tfoot",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tbody><tr>", "</tfoot><tr>"]
+},
+
+{"description": "tbody start-tag followed by end-tag",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["<tbody></foo>"]
+},
+
+{"description": "tbody start-tag at EOF",
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>"]
+},
+
+
+
+{"description": "tbody end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Comment", "foo"]],
+ "expected": ["</tbody><!--foo-->"]
+},
+
+{"description": "tbody end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", " foo"]],
+ "expected": ["</tbody> foo"]
+},
+
+{"description": "tbody end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["Characters", "foo"]],
+ "expected": ["</tbody>foo"]
+},
+
+{"description": "tbody end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tbody><foo>"]
+},
+
+{"description": "tbody end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>", "</tbody>"]
+},
+
+{"description": "tbody end-tag followed by tfoot start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["StartTag", "http://www.w3.org/1999/xhtml", "tfoot", {}]],
+ "expected": ["<tfoot>"]
+},
+
+{"description": "tbody end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tbody end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tbody"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "tfoot end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Comment", "foo"]],
+ "expected": ["</tfoot><!--foo-->"]
+},
+
+{"description": "tfoot end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", " foo"]],
+ "expected": ["</tfoot> foo"]
+},
+
+{"description": "tfoot end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["Characters", "foo"]],
+ "expected": ["</tfoot>foo"]
+},
+
+{"description": "tfoot end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tfoot><foo>"]
+},
+
+{"description": "tfoot end-tag followed by tbody start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["StartTag", "http://www.w3.org/1999/xhtml", "tbody", {}]],
+ "expected": ["<tbody>", "</tfoot>"]
+},
+
+{"description": "tfoot end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tfoot end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tfoot"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "tr end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Comment", "foo"]],
+ "expected": ["</tr><!--foo-->"]
+},
+
+{"description": "tr end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", " foo"]],
+ "expected": ["</tr> foo"]
+},
+
+{"description": "tr end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["Characters", "foo"]],
+ "expected": ["</tr>foo"]
+},
+
+{"description": "tr end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</tr><foo>"]
+},
+
+{"description": "tr end-tag followed by tr start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["StartTag", "http://www.w3.org/1999/xhtml", "tr", {}]],
+ "expected": ["<tr>", "</tr>"]
+},
+
+{"description": "tr end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "tr end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "tr"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "td end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Comment", "foo"]],
+ "expected": ["</td><!--foo-->"]
+},
+
+{"description": "td end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", " foo"]],
+ "expected": ["</td> foo"]
+},
+
+{"description": "td end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["Characters", "foo"]],
+ "expected": ["</td>foo"]
+},
+
+{"description": "td end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</td><foo>"]
+},
+
+{"description": "td end-tag followed by td start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
+ "expected": ["<td>", "</td>"]
+},
+
+{"description": "td end-tag followed by th start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
+ "expected": ["<th>", "</td>"]
+},
+
+{"description": "td end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "td end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "td"]],
+ "expected": [""]
+},
+
+
+
+
+{"description": "th end-tag followed by comment",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Comment", "foo"]],
+ "expected": ["</th><!--foo-->"]
+},
+
+{"description": "th end-tag followed by space character",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", " foo"]],
+ "expected": ["</th> foo"]
+},
+
+{"description": "th end-tag followed by text",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["Characters", "foo"]],
+ "expected": ["</th>foo"]
+},
+
+{"description": "th end-tag followed by start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "foo", {}]],
+ "expected": ["</th><foo>"]
+},
+
+{"description": "th end-tag followed by th start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "th", {}]],
+ "expected": ["<th>", "</th>"]
+},
+
+{"description": "th end-tag followed by td start-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["StartTag", "http://www.w3.org/1999/xhtml", "td", {}]],
+ "expected": ["<td>", "</th>"]
+},
+
+{"description": "th end-tag followed by end-tag",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml", "th"], ["EndTag", "http://www.w3.org/1999/xhtml", "foo"]],
+ "expected": ["</foo>"]
+},
+
+{"description": "th end-tag at EOF",
+ "input": [["EndTag", "http://www.w3.org/1999/xhtml"    , "th"]],
+ "expected": [""]
+}
+
+]}
diff --git a/html5lib/tests/testdata/serializer/options.test b/html5lib/tests/testdata/serializer/options.test
new file mode 100644
index 00000000..6f342dd3
--- /dev/null
+++ b/html5lib/tests/testdata/serializer/options.test
@@ -0,0 +1,60 @@
+{"tests":[
+
+{"description": "quote_char=\"'\"",
+ "options": {"quote_char": "'"},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": null, "name": "title", "value": "test 'with' quote_char"}]]],
+ "expected": ["<span title='test &#39;with&#39; quote_char'>"]
+},
+
+{"description": "quote_attr_values=true",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "button", [{"namespace": null, "name": "disabled", "value" :"disabled"}]]],
+ "expected": ["<button disabled>"],
+ "xhtml":    ["<button disabled=\"disabled\">"]
+},
+
+{"description": "quote_attr_values=true with irrelevant",
+ "options": {"quote_attr_values": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+
+{"description": "use_trailing_solidus=true with void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["EmptyTag", "img", {}]],
+ "expected": ["<img />"]
+},
+
+{"description": "use_trailing_solidus=true with non-void element",
+ "options": {"use_trailing_solidus": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", {}]],
+ "expected": ["<div>"]
+},
+
+{"description": "minimize_boolean_attributes=false",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :"irrelevant"}]]],
+ "expected": ["<div irrelevant=irrelevant>"],
+ "xhtml":    ["<div irrelevant=\"irrelevant\">"]
+},
+
+{"description": "minimize_boolean_attributes=false with empty value",
+ "options": {"minimize_boolean_attributes": false},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "div", [{"namespace": null, "name": "irrelevant", "value" :""}]]],
+ "expected": ["<div irrelevant=\"\">"]
+},
+
+{"description": "escape less than signs in attribute values",
+ "options": {"escape_lt_in_attrs": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "a", [{"namespace": null, "name": "title", "value": "a<b>c&d"}]]],
+ "expected": ["<a title=\"a&lt;b>c&amp;d\">"]
+},
+
+{"description": "rcdata",
+ "options": {"escape_rcdata": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "a<b>c&d"]],
+ "expected": ["<script>a&lt;b&gt;c&amp;d"]
+}
+
+]}
diff --git a/html5lib/tests/testdata/serializer/whitespace.test b/html5lib/tests/testdata/serializer/whitespace.test
new file mode 100644
index 00000000..e5d050d3
--- /dev/null
+++ b/html5lib/tests/testdata/serializer/whitespace.test
@@ -0,0 +1,51 @@
+{"tests": [
+
+{"description": "bare text with leading spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "\t\r\n\u000C foo"]],
+ "expected": [" foo"]
+},
+
+{"description": "bare text with trailing spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C"]],
+ "expected": ["foo "]
+},
+
+{"description": "bare text with inner spaces",
+ "options": {"strip_whitespace": true},
+ "input": [["Characters", "foo \t\r\n\u000C bar"]],
+ "expected": ["foo bar"]
+},
+
+{"description": "text within <pre>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</pre>"]
+},
+
+{"description": "text within <pre>, with inner markup",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "pre", {}], ["Characters", "\t\r\n\u000C fo"], ["StartTag", "http://www.w3.org/1999/xhtml", "span", {}], ["Characters", "o \t\r\n\u000C b"], ["EndTag", "http://www.w3.org/1999/xhtml", "span"], ["Characters", "ar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "pre"]],
+ "expected": ["<pre>\t\r\n\u000C fo<span>o \t\r\n\u000C b</span>ar \t\r\n\u000C</pre>"]
+},
+
+{"description": "text within <textarea>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "textarea", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "textarea"]],
+ "expected": ["<textarea>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</textarea>"]
+},
+
+{"description": "text within <script>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "script", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "script"]],
+ "expected": ["<script>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</script>"]
+},
+
+{"description": "text within <style>",
+ "options": {"strip_whitespace": true},
+ "input": [["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C"], ["EndTag", "http://www.w3.org/1999/xhtml", "style"]],
+ "expected": ["<style>\t\r\n\u000C foo \t\r\n\u000C bar \t\r\n\u000C</style>"]
+}
+
+]}
\ No newline at end of file
diff --git a/html5lib/tests/testdata/sniffer/htmlOrFeed.json b/html5lib/tests/testdata/sniffer/htmlOrFeed.json
new file mode 100644
index 00000000..c1506dbc
--- /dev/null
+++ b/html5lib/tests/testdata/sniffer/htmlOrFeed.json
@@ -0,0 +1,43 @@
+[
+    {"type": "text/html", "input": ""},
+    {"type": "text/html", "input": "<!---->"},
+    {"type": "text/html", "input": "<!--asdfaslkjdf;laksjdf as;dkfjsd-->"},
+    {"type": "text/html", "input": "<!"},
+    {"type": "text/html", "input": "\t"},
+    {"type": "text/html", "input": "<!>"},
+    {"type": "text/html", "input": "<?"},
+    {"type": "text/html", "input": "<??>"},
+    {"type": "application/rss+xml", "input": "<rss"},
+    {"type": "application/atom+xml", "input": "<feed"},
+    {"type": "text/html", "input": "<html"},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>302 Found</title>\n</head><body>\n<h1>Found</h1>\n<p>The document has moved <a href=\"http://feeds.feedburner.com/gofug\">here</a>.</p>\n</body></html>\n"},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">\r\n<HTML><HEAD>\r\n   <link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/289619328/feed.css\" /><link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/431602649/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/382549546/feed.css\" />\r\n<link rel=\"stylesheet\" type=\"text/css\" href=\"http://cache.blogads.com/314618017/feed.css\" /><META http-equiv=\"expires\" content="},
+    {"type": "text/html", "input": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\" \"http://www.w3.org/TR/html4/loose.dtd\">\r\n<html>\r\n<head>\r\n<title>Xiaxue - Chicken pie blogger.</title><meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"><style type=\"text/css\">\r\n<style type=\"text/css\">\r\n<!--\r\nbody {\r\n background-color: #FFF2F2;\r\n}\r\n.style1 {font-family: Georgia, \"Times New Roman\", Times, serif}\r\n.style2 {\r\n color: #8a567c;\r\n font-size: 14px;\r\n font-family: Georgia, \"Times New Roman\", Times, serif;\r\n}\r"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head> \r\n<title>Google Operating System</title>\r\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"Description\" content=\"Unofficial news and tips about Google. A blog that watches Google's latest developments and the attempts to move your operating system online.\" />\r\n<meta name=\"generator\" c"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n  <title>Assimilated Press</title>  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Assimilated Press - Atom\" href=\"http://assimila"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<head>\r\n  <title>PostSecret</title>\r\n<META name=\"keywords\" Content=\"secrets, postcard, secret, postcards, postsecret, postsecrets,online confessional, post secret, post secrets, artomatic, post a secret\"><META name=\"discription\" Content=\"See a Secret...Share a Secret\">  <meta http-equiv=\"Content-Type\" content=\"te"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns='http://www.w3.org/1999/xhtml' xmlns:b='http://www.google.com/2005/gml/b' xmlns:data='http://www.google.com/2005/gml/data' xmlns:expr='http://www.google.com/2005/gml/expr'>\n  <head>\n    \n  <meta content='text/html; charset=UTF-8' http-equiv='Content-Type'/>\n  <meta content='true' name='MSSmartTagsPreventParsing'/>\n  <meta content='blogger' name='generator'/>\n  <link rel=\"alternate\" typ"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\">\n<head profile=\"http://gmpg.org/xfn/11\"> \n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />  \n<title> CMS Lever</title><link rel=\"stylesheet\" type=\"text/css\" media=\"screen\" href=\"http://s.wordpress.com/wp-content/themes/pub/twenty-eight/2813.css\"/>\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" h"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> Park Avenue Peerage</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://parkavenuepeerage.wordpress.com/feed/\" />\t<link rel=\"pingback\" href="},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"ja\"><head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n<title> \u884c\u96f2\u6d41\u6c34 -like a floating clouds and running water-</title>\t<meta name=\"generator\" content=\"WordPress.com\" />\t<!-- feeds -->\n\t<link rel=\"alternate\" type=\"application/rss+xml\" title=\"RSS 2.0\" href=\"http://shw4.wordpress.com/feed/\" />\t<li"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Go Fug Yourself</title><link rel=\"stylesheet\" href=\"http://gofugyourself.typepad.com/go_fug_yourself/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Atom\" "},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\"><head profile=\"http://gmpg.org/xfn/11\">\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title> Ladies&#8230;</title><meta name=\"generator\" content=\"WordPress.com\" /> <!-- leave this for stats --><link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/default/style.css?1\" type=\"tex"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\r\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\r\n<head>\r\n  <title>The Sartorialist</title>  <meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"The Sartorialist - Atom\" href=\"http://thesartorialist.blogspot"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \n     \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html  xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\" />\n<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n<title>Creating Passionate Users</title><link rel=\"stylesheet\" href=\"http://headrush.typepad.com/creating_passionate_users/styles.css\" type=\"text/css\" />\n<link rel=\"alternate\" type"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n    <meta name=\"keywords\" content=\"marketing, blog, seth, ideas, respect, permission\" />\n    <meta name=\"description\" content=\"Seth Godin's riffs on marketing, respect, and the "},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n\t\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" id=\"sixapart-standard\">\n<head>\n\t<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />\n\t<meta name=\"generator\" content=\"http://www.typepad.com/\" />\n\t\n\t\n    \n    <meta name=\"description\" content=\" Western Civilization hangs in the balance. This blog is part of the solution,the cure. Get your heads out of the sand and Fight the G"},
+    {"type": "text/html", "input": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\" \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\" dir=\"ltr\" lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\" />\n<title> From Under the Rotunda</title>\n<link rel=\"stylesheet\" href=\"http://s.wordpress.com/wp-content/themes/pub/andreas04/style.css\" type=\"text/css\""},
+    {"type": "application/atom+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><?xml-stylesheet href=\"http://www.blogger.com/styles/atom.css\" type=\"text/css\"?><feed xmlns='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/'><id>tag:blogger.com,1999:blog-10861780</id><updated>2007-07-27T12:38:50.888-07:00</updated><title type='text'>Official Google Blog</title><link rel='alternate' type='text/html' href='http://googleblog.blogspot.com/'/><link rel='next' type='application/atom+xml' href='http://googleblog.blogs"},
+    {"type": "application/rss+xml", "input": "<?xml version='1.0' encoding='UTF-8'?><rss xmlns:atom='http://www.w3.org/2005/Atom' xmlns:openSearch='http://a9.com/-/spec/opensearchrss/1.0/' version='2.0'><channel><atom:id>tag:blogger.com,1999:blog-10861780</atom:id><lastBuildDate>Fri, 27 Jul 2007 19:38:50 +0000</lastBuildDate><title>Official Google Blog</title><description/><link>http://googleblog.blogspot.com/</link><managingEditor>Eric Case</managingEditor><generator>Blogger</generator><openSearch:totalResults>729</openSearch:totalResults><openSearc"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"pahrefhttpwwwfeedburnercomtarget_blankimgsrchttpwwwfeedburnercomfbimagespubpowered_by_fbgifaltPoweredbyFeedBurnerstyleborder0ap\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>From Under the Rotunda</title>\n\t<link>http://dannybernardi.wordpress.com</link>\n\t<description>The Monographs of Danny Ber"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>CMS Lever</title>\n\t<link>http://kanaguri.wordpress.com</link>\n\t<description>CMS\u306e\u6c17\u306b\u306a\u3063\u305f\u3053\u3068</description>\n\t<pubDate>Wed, 18 Jul 2007 21:26:22 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>ja</languag"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\">\n    <title>Atlas Shrugs</title>\n    <link rel=\"self\" type=\"application/atom+xml\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/atom.xml\" />\n    <link rel=\"alternate\" type=\"text/html\" href=\"http://atlasshrugs2000.typepad.com/atlas_shrugs/\" />\n    <id>tag:typepad.com,2003:weblog-132946</id>\n    <updated>2007-08-15T16:07:34-04"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:thr=\"http://purl.org/syndication/thread/1.0\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n    <title>Creating Passionate Users</title>\r\n  "},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\">\r\n    <title>Seth's Blog</title>\r\n    <link rel=\"alternate\" type=\"text/html\" href=\"http://sethgodin.typepad.com/seths_blog/\" />\r\n    <link rel=\"s"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atom10full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://www.w3.org/2005/Atom\" xmlns:openSearch=\"http://a9.com/-/spec/opensearchrss/1.0/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\"><id>tag:blogger.com,1999:blog-32454861</id><updated>2007-07-31T21:44:09.867+02:00</upd"},
+    {"type": "application/atom+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/atomfull.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><feed xmlns=\"http://purl.org/atom/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"0.3\">\r\n  <title>Go Fug Yourself</title>\r\n  <link rel=\"alternate\" type=\"text/html\" href=\"http://go"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n<?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/rss2full.xsl\" type=\"text/xsl\" media=\"screen\"?><?xml-stylesheet href=\"http://feeds.feedburner.com/~d/styles/itemcontent.css\" type=\"text/css\" media=\"screen\"?><rss xmlns:creativeCommons=\"http://backend.userland.com/creativeCommonsRssModule\" xmlns:feedburner=\"http://rssnamespace.org/feedburner/ext/1.0\" version=\"2.0\"><channel><title>Google Operating System</title><link>http://googlesystem.blogspot.com/</link>"},
+    {"type": "application/rss+xml", "input": "<?xml version=\"1.0\" encoding=\"\"?>\n<!-- generator=\"wordpress/MU\" -->\n<rss version=\"2.0\"\n\txmlns:content=\"http://purl.org/rss/1.0/modules/content/\"\n\txmlns:wfw=\"http://wellformedweb.org/CommentAPI/\"\n\txmlns:dc=\"http://purl.org/dc/elements/1.1/\"\n\t><channel>\n\t<title>Nunublog</title>\n\t<link>http://nunubh.wordpress.com</link>\n\t<description>Just Newbie Blog!</description>\n\t<pubDate>Mon, 09 Jul 2007 18:54:09 +0000</pubDate>\n\t<generator>http://wordpress.org/?v=MU</generator>\n\t<language>id</language>\n\t\t\t<item>\n\t\t<ti"},
+    {"type": "text/html", "input": "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" lang=\"en\">\r\n<HEAD>\r\n<TITLE>Design*Sponge</TITLE><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\r\n<meta name=\"MSSmartTagsPreventParsing\" content=\"true\" />\r\n<meta name=\"generator\" content=\"Blogger\" />\r\n<link rel=\"alternate\" type=\"application/atom+xml\" title=\"Design*Sponge - Atom\" href=\"http://designsponge.blogspot.com/feeds/posts/default\" />\r\n<link rel=\"alternate\" type=\"application/rss+xml\" title=\"Design*Sponge - RSS\" href="},
+    {"type": "text/html", "input": "<HTML>\n<HEAD>\n<TITLE>Moved Temporarily</TITLE>\n</HEAD>\n<BODY BGCOLOR=\"#FFFFFF\" TEXT=\"#000000\">\n<H1>Moved Temporarily</H1>\nThe document has moved <A HREF=\"http://feeds.feedburner.com/thesecretdiaryofstevejobs\">here</A>.\n</BODY>\n</HTML>\n"}
+]
\ No newline at end of file
diff --git a/html5lib/tests/testdata/tokenizer/contentModelFlags.test b/html5lib/tests/testdata/tokenizer/contentModelFlags.test
new file mode 100644
index 00000000..a8b16958
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/contentModelFlags.test
@@ -0,0 +1,75 @@
+{"tests": [
+
+{"description":"PLAINTEXT content model flag",
+"initialStates":["PLAINTEXT state"],
+"lastStartTag":"plaintext",
+"input":"<head>&body;",
+"output":[["Character", "<head>&body;"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp>",
+"output":[["Character", "foo"], ["EndTag", "xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (case-insensitivity)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xMp>",
+"output":[["Character", "foo"], ["EndTag", "xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with space)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp ",
+"output":[["Character", "foo"], "ParseError"]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with EOF)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp",
+"output":[["Character", "foo</xmp"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT (ending with slash)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp/",
+"output":[["Character", "foo"], "ParseError"]},
+
+{"description":"End tag not closing RCDATA or RAWTEXT (ending with left-angle-bracket)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp<",
+"output":[["Character", "foo</xmp<"]]},
+
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"</foo>bar</xmp>",
+"output":[["Character", "</foo>bar"], ["EndTag", "xmp"]]},
+
+{"description":"End tag with incorrect name in RCDATA or RAWTEXT (starting like correct name)",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"</foo>bar</xmpaar>",
+"output":[["Character", "</foo>bar</xmpaar>"]]},
+
+{"description":"End tag closing RCDATA or RAWTEXT, switching back to PCDATA",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo</xmp></baz>",
+"output":[["Character", "foo"], ["EndTag", "xmp"], ["EndTag", "baz"]]},
+
+{"description":"RAWTEXT w/ something looking like an entity",
+"initialStates":["RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"&foo;",
+"output":[["Character", "&foo;"]]},
+
+{"description":"RCDATA w/ an entity",
+"initialStates":["RCDATA state"],
+"lastStartTag":"textarea",
+"input":"&lt;",
+"output":[["Character", "<"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/domjs.test b/html5lib/tests/testdata/tokenizer/domjs.test
new file mode 100644
index 00000000..74771e23
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/domjs.test
@@ -0,0 +1,90 @@
+{
+    "tests": [
+        {
+            "description":"CR in bogus comment state",
+            "input":"<?\u000d",
+            "output":["ParseError", ["Comment", "?\u000a"]]
+        },
+        {
+            "description":"CRLF in bogus comment state",
+            "input":"<?\u000d\u000a",
+            "output":["ParseError", ["Comment", "?\u000a"]]
+        },
+        {
+            "description":"NUL in RCDATA and RAWTEXT",
+            "doubleEscaped":true,
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "input":"\\u0000",
+            "output":["ParseError", ["Character", "\\uFFFD"]]
+        },
+        {
+            "description":"skip first BOM but not later ones",
+            "input":"\uFEFFfoo\uFEFFbar",
+            "output":[["Character", "foo\uFEFFbar"]]
+        },
+        {
+            "description":"Non BMP-charref in in RCDATA",
+            "initialStates":["RCDATA state"],
+            "input":"&NotEqualTilde;",
+            "output":[["Character", "\u2242\u0338"]]
+        },
+        {
+            "description":"Bad charref in in RCDATA",
+            "initialStates":["RCDATA state"],
+            "input":"&NotEqualTild;",
+            "output":["ParseError", ["Character", "&NotEqualTild;"]]
+        },
+        {
+            "description":"lowercase endtags in RCDATA and RAWTEXT",
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "lastStartTag":"xmp",
+            "input":"</XMP>",
+            "output":[["EndTag","xmp"]]
+        },
+        {
+            "description":"bad endtag in RCDATA and RAWTEXT",
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "lastStartTag":"xmp",
+            "input":"</ XMP>",
+            "output":[["Character","</ XMP>"]]
+        },
+        {
+            "description":"bad endtag in RCDATA and RAWTEXT",
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "lastStartTag":"xmp",
+            "input":"</xm>",
+            "output":[["Character","</xm>"]]
+        },
+        {
+            "description":"bad endtag in RCDATA and RAWTEXT",
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "lastStartTag":"xmp",
+            "input":"</xm ",
+            "output":[["Character","</xm "]]
+        },
+        {
+            "description":"bad endtag in RCDATA and RAWTEXT",
+            "initialStates":["RCDATA state", "RAWTEXT state"],
+            "lastStartTag":"xmp",
+            "input":"</xm/",
+            "output":[["Character","</xm/"]]
+        },
+        {
+            "description":"Non BMP-charref in attribute",
+            "input":"<p id=\"&NotEqualTilde;\">",
+            "output":[["StartTag", "p", {"id":"\u2242\u0338"}]]
+        },
+        {
+            "description":"--!NUL in comment ",
+            "doubleEscaped":true,
+            "input":"<!----!\\u0000-->",
+            "output":["ParseError", ["Comment", "--!\\uFFFD"]]
+        },
+        {
+            "description":"space EOF after doctype ",
+            "input":"<!DOCTYPE html ",
+            "output":["ParseError", ["DOCTYPE", "html", null, null , false]]
+        }
+
+    ]
+}
diff --git a/html5lib/tests/testdata/tokenizer/entities.test b/html5lib/tests/testdata/tokenizer/entities.test
new file mode 100644
index 00000000..1cb17a76
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/entities.test
@@ -0,0 +1,283 @@
+{"tests": [
+
+{"description": "Undefined named entity in attribute value ending in semicolon and whose name starts with a known entity name.",
+"input":"<h a='&noti;'>",
+"output": ["ParseError", ["StartTag", "h", {"a": "&noti;"}]]},
+
+{"description": "Entity name followed by the equals sign in an attribute value.",
+"input":"<h a='&lang='>",
+"output": ["ParseError", ["StartTag", "h", {"a": "&lang="}]]},
+
+{"description": "CR as numeric entity",
+"input":"&#013;",
+"output": ["ParseError", ["Character", "\r"]]},
+
+{"description": "CR as hexadecimal numeric entity",
+"input":"&#x00D;",
+"output": ["ParseError", ["Character", "\r"]]},
+
+{"description": "Windows-1252 EURO SIGN numeric entity.",
+"input":"&#0128;",
+"output": ["ParseError", ["Character", "\u20AC"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0129;",
+"output": ["ParseError", ["Character", "\u0081"]]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0130;",
+"output": ["ParseError", ["Character", "\u201A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.",
+"input":"&#0131;",
+"output": ["ParseError", ["Character", "\u0192"]]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.",
+"input":"&#0132;",
+"output": ["ParseError", ["Character", "\u201E"]]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.",
+"input":"&#0133;",
+"output": ["ParseError", ["Character", "\u2026"]]},
+
+{"description": "Windows-1252 DAGGER numeric entity.",
+"input":"&#0134;",
+"output": ["ParseError", ["Character", "\u2020"]]},
+
+{"description": "Windows-1252 DOUBLE DAGGER numeric entity.",
+"input":"&#0135;",
+"output": ["ParseError", ["Character", "\u2021"]]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.",
+"input":"&#0136;",
+"output": ["ParseError", ["Character", "\u02C6"]]},
+
+{"description": "Windows-1252 PER MILLE SIGN numeric entity.",
+"input":"&#0137;",
+"output": ["ParseError", ["Character", "\u2030"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.",
+"input":"&#0138;",
+"output": ["ParseError", ["Character", "\u0160"]]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0139;",
+"output": ["ParseError", ["Character", "\u2039"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.",
+"input":"&#0140;",
+"output": ["ParseError", ["Character", "\u0152"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0141;",
+"output": ["ParseError", ["Character", "\u008D"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.",
+"input":"&#0142;",
+"output": ["ParseError", ["Character", "\u017D"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0143;",
+"output": ["ParseError", ["Character", "\u008F"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0144;",
+"output": ["ParseError", ["Character", "\u0090"]]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0145;",
+"output": ["ParseError", ["Character", "\u2018"]]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.",
+"input":"&#0146;",
+"output": ["ParseError", ["Character", "\u2019"]]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0147;",
+"output": ["ParseError", ["Character", "\u201C"]]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.",
+"input":"&#0148;",
+"output": ["ParseError", ["Character", "\u201D"]]},
+
+{"description": "Windows-1252 BULLET numeric entity.",
+"input":"&#0149;",
+"output": ["ParseError", ["Character", "\u2022"]]},
+
+{"description": "Windows-1252 EN DASH numeric entity.",
+"input":"&#0150;",
+"output": ["ParseError", ["Character", "\u2013"]]},
+
+{"description": "Windows-1252 EM DASH numeric entity.",
+"input":"&#0151;",
+"output": ["ParseError", ["Character", "\u2014"]]},
+
+{"description": "Windows-1252 SMALL TILDE numeric entity.",
+"input":"&#0152;",
+"output": ["ParseError", ["Character", "\u02DC"]]},
+
+{"description": "Windows-1252 TRADE MARK SIGN numeric entity.",
+"input":"&#0153;",
+"output": ["ParseError", ["Character", "\u2122"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.",
+"input":"&#0154;",
+"output": ["ParseError", ["Character", "\u0161"]]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.",
+"input":"&#0155;",
+"output": ["ParseError", ["Character", "\u203A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.",
+"input":"&#0156;",
+"output": ["ParseError", ["Character", "\u0153"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR numeric entity.",
+"input":"&#0157;",
+"output": ["ParseError", ["Character", "\u009D"]]},
+
+{"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.",
+"input":"&#x080;",
+"output": ["ParseError", ["Character", "\u20AC"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x081;",
+"output": ["ParseError", ["Character", "\u0081"]]},
+
+{"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x082;",
+"output": ["ParseError", ["Character", "\u201A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.",
+"input":"&#x083;",
+"output": ["ParseError", ["Character", "\u0192"]]},
+
+{"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x084;",
+"output": ["ParseError", ["Character", "\u201E"]]},
+
+{"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.",
+"input":"&#x085;",
+"output": ["ParseError", ["Character", "\u2026"]]},
+
+{"description": "Windows-1252 DAGGER hexadecimal numeric entity.",
+"input":"&#x086;",
+"output": ["ParseError", ["Character", "\u2020"]]},
+
+{"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.",
+"input":"&#x087;",
+"output": ["ParseError", ["Character", "\u2021"]]},
+
+{"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.",
+"input":"&#x088;",
+"output": ["ParseError", ["Character", "\u02C6"]]},
+
+{"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.",
+"input":"&#x089;",
+"output": ["ParseError", ["Character", "\u2030"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x08A;",
+"output": ["ParseError", ["Character", "\u0160"]]},
+
+{"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x08B;",
+"output": ["ParseError", ["Character", "\u2039"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x08C;",
+"output": ["ParseError", ["Character", "\u0152"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08D;",
+"output": ["ParseError", ["Character", "\u008D"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x08E;",
+"output": ["ParseError", ["Character", "\u017D"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x08F;",
+"output": ["ParseError", ["Character", "\u008F"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x090;",
+"output": ["ParseError", ["Character", "\u0090"]]},
+
+{"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x091;",
+"output": ["ParseError", ["Character", "\u2018"]]},
+
+{"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x092;",
+"output": ["ParseError", ["Character", "\u2019"]]},
+
+{"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x093;",
+"output": ["ParseError", ["Character", "\u201C"]]},
+
+{"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x094;",
+"output": ["ParseError", ["Character", "\u201D"]]},
+
+{"description": "Windows-1252 BULLET hexadecimal numeric entity.",
+"input":"&#x095;",
+"output": ["ParseError", ["Character", "\u2022"]]},
+
+{"description": "Windows-1252 EN DASH hexadecimal numeric entity.",
+"input":"&#x096;",
+"output": ["ParseError", ["Character", "\u2013"]]},
+
+{"description": "Windows-1252 EM DASH hexadecimal numeric entity.",
+"input":"&#x097;",
+"output": ["ParseError", ["Character", "\u2014"]]},
+
+{"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.",
+"input":"&#x098;",
+"output": ["ParseError", ["Character", "\u02DC"]]},
+
+{"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.",
+"input":"&#x099;",
+"output": ["ParseError", ["Character", "\u2122"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.",
+"input":"&#x09A;",
+"output": ["ParseError", ["Character", "\u0161"]]},
+
+{"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.",
+"input":"&#x09B;",
+"output": ["ParseError", ["Character", "\u203A"]]},
+
+{"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.",
+"input":"&#x09C;",
+"output": ["ParseError", ["Character", "\u0153"]]},
+
+{"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.",
+"input":"&#x09D;",
+"output": ["ParseError", ["Character", "\u009D"]]},
+
+{"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.",
+"input":"&#x09E;",
+"output": ["ParseError", ["Character", "\u017E"]]},
+
+{"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.",
+"input":"&#x09F;",
+"output": ["ParseError", ["Character", "\u0178"]]},
+
+{"description": "Decimal numeric entity followed by hex character a.",
+"input":"&#97a",
+"output": ["ParseError", ["Character", "aa"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97A",
+"output": ["ParseError", ["Character", "aA"]]},
+
+{"description": "Decimal numeric entity followed by hex character f.",
+"input":"&#97f",
+"output": ["ParseError", ["Character", "af"]]},
+
+{"description": "Decimal numeric entity followed by hex character A.",
+"input":"&#97F",
+"output": ["ParseError", ["Character", "aF"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/escapeFlag.test b/html5lib/tests/testdata/tokenizer/escapeFlag.test
new file mode 100644
index 00000000..18cb4309
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/escapeFlag.test
@@ -0,0 +1,33 @@
+{"tests": [
+
+{"description":"Commented close tag in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!--</xmp>--></xmp>",
+"output":[["Character", "foo<!--"], ["EndTag", "xmp"], ["Character", "-->"], ["EndTag", "xmp"]]},
+
+{"description":"Bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!-->baz</xmp>",
+"output":[["Character", "foo<!-->baz"], ["EndTag", "xmp"]]},
+
+{"description":"End tag surrounded by bogus comment in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!--></xmp><!-->baz</xmp>",
+"output":[["Character", "foo<!-->"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]]},
+
+{"description":"Commented entities in RCDATA",
+"initialStates":["RCDATA state"],
+"lastStartTag":"xmp",
+"input":" &amp; <!-- &amp; --> &amp; </xmp>",
+"output":[["Character", " & <!-- & --> & "], ["EndTag", "xmp"]]},
+
+{"description":"Incorrect comment ending sequences in RCDATA or RAWTEXT",
+"initialStates":["RCDATA state", "RAWTEXT state"],
+"lastStartTag":"xmp",
+"input":"foo<!-- x --x>x-- >x--!>x--<></xmp>",
+"output":[["Character", "foo<!-- x --x>x-- >x--!>x--<>"], ["EndTag", "xmp"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/namedEntities.test b/html5lib/tests/testdata/tokenizer/namedEntities.test
new file mode 100644
index 00000000..4a51c9c1
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/namedEntities.test
@@ -0,0 +1,44189 @@
+{
+    "tests": [
+        {
+            "input": "&AElig", 
+            "description": "Named entity: AElig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&AElig;", 
+            "description": "Named entity: AElig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&AMP", 
+            "description": "Named entity: AMP without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&"
+                ]
+            ]
+        }, 
+        {
+            "input": "&AMP;", 
+            "description": "Named entity: AMP; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "&"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aacute", 
+            "description": "Named entity: Aacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aacute;", 
+            "description": "Named entity: Aacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Abreve", 
+            "description": "Bad named entity: Abreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Abreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Abreve;", 
+            "description": "Named entity: Abreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0102"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Acirc", 
+            "description": "Named entity: Acirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Acirc;", 
+            "description": "Named entity: Acirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Acy", 
+            "description": "Bad named entity: Acy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Acy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Acy;", 
+            "description": "Named entity: Acy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0410"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Afr", 
+            "description": "Bad named entity: Afr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Afr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Afr;", 
+            "description": "Named entity: Afr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd04"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Agrave", 
+            "description": "Named entity: Agrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Agrave;", 
+            "description": "Named entity: Agrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Alpha", 
+            "description": "Bad named entity: Alpha without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Alpha"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Alpha;", 
+            "description": "Named entity: Alpha; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0391"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Amacr", 
+            "description": "Bad named entity: Amacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Amacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Amacr;", 
+            "description": "Named entity: Amacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0100"
+                ]
+            ]
+        }, 
+        {
+            "input": "&And", 
+            "description": "Bad named entity: And without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&And"
+                ]
+            ]
+        }, 
+        {
+            "input": "&And;", 
+            "description": "Named entity: And; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a53"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aogon", 
+            "description": "Bad named entity: Aogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Aogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aogon;", 
+            "description": "Named entity: Aogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0104"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aopf", 
+            "description": "Bad named entity: Aopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Aopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aopf;", 
+            "description": "Named entity: Aopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd38"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ApplyFunction", 
+            "description": "Bad named entity: ApplyFunction without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ApplyFunction"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ApplyFunction;", 
+            "description": "Named entity: ApplyFunction; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2061"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aring", 
+            "description": "Named entity: Aring without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Aring;", 
+            "description": "Named entity: Aring; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ascr", 
+            "description": "Bad named entity: Ascr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ascr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ascr;", 
+            "description": "Named entity: Ascr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udc9c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Assign", 
+            "description": "Bad named entity: Assign without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Assign"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Assign;", 
+            "description": "Named entity: Assign; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2254"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Atilde", 
+            "description": "Named entity: Atilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Atilde;", 
+            "description": "Named entity: Atilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Auml", 
+            "description": "Named entity: Auml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Auml;", 
+            "description": "Named entity: Auml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Backslash", 
+            "description": "Bad named entity: Backslash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Backslash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Backslash;", 
+            "description": "Named entity: Backslash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2216"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Barv", 
+            "description": "Bad named entity: Barv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Barv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Barv;", 
+            "description": "Named entity: Barv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Barwed", 
+            "description": "Bad named entity: Barwed without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Barwed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Barwed;", 
+            "description": "Named entity: Barwed; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2306"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bcy", 
+            "description": "Bad named entity: Bcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bcy;", 
+            "description": "Named entity: Bcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0411"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Because", 
+            "description": "Bad named entity: Because without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Because"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Because;", 
+            "description": "Named entity: Because; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2235"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bernoullis", 
+            "description": "Bad named entity: Bernoullis without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bernoullis"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bernoullis;", 
+            "description": "Named entity: Bernoullis; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Beta", 
+            "description": "Bad named entity: Beta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Beta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Beta;", 
+            "description": "Named entity: Beta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0392"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bfr", 
+            "description": "Bad named entity: Bfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bfr;", 
+            "description": "Named entity: Bfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd05"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bopf", 
+            "description": "Bad named entity: Bopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bopf;", 
+            "description": "Named entity: Bopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd39"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Breve", 
+            "description": "Bad named entity: Breve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Breve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Breve;", 
+            "description": "Named entity: Breve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bscr", 
+            "description": "Bad named entity: Bscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bscr;", 
+            "description": "Named entity: Bscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bumpeq", 
+            "description": "Bad named entity: Bumpeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Bumpeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Bumpeq;", 
+            "description": "Named entity: Bumpeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CHcy", 
+            "description": "Bad named entity: CHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CHcy;", 
+            "description": "Named entity: CHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0427"
+                ]
+            ]
+        }, 
+        {
+            "input": "&COPY", 
+            "description": "Named entity: COPY without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&COPY;", 
+            "description": "Named entity: COPY; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cacute", 
+            "description": "Bad named entity: Cacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cacute;", 
+            "description": "Named entity: Cacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0106"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cap", 
+            "description": "Bad named entity: Cap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cap;", 
+            "description": "Named entity: Cap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CapitalDifferentialD", 
+            "description": "Bad named entity: CapitalDifferentialD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CapitalDifferentialD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CapitalDifferentialD;", 
+            "description": "Named entity: CapitalDifferentialD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2145"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cayleys", 
+            "description": "Bad named entity: Cayleys without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cayleys"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cayleys;", 
+            "description": "Named entity: Cayleys; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccaron", 
+            "description": "Bad named entity: Ccaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ccaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccaron;", 
+            "description": "Named entity: Ccaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccedil", 
+            "description": "Named entity: Ccedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccedil;", 
+            "description": "Named entity: Ccedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccirc", 
+            "description": "Bad named entity: Ccirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ccirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ccirc;", 
+            "description": "Named entity: Ccirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0108"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cconint", 
+            "description": "Bad named entity: Cconint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cconint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cconint;", 
+            "description": "Named entity: Cconint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2230"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cdot", 
+            "description": "Bad named entity: Cdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cdot;", 
+            "description": "Named entity: Cdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cedilla", 
+            "description": "Bad named entity: Cedilla without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cedilla"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cedilla;", 
+            "description": "Named entity: Cedilla; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CenterDot", 
+            "description": "Bad named entity: CenterDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CenterDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CenterDot;", 
+            "description": "Named entity: CenterDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cfr", 
+            "description": "Bad named entity: Cfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cfr;", 
+            "description": "Named entity: Cfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Chi", 
+            "description": "Bad named entity: Chi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Chi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Chi;", 
+            "description": "Named entity: Chi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleDot", 
+            "description": "Bad named entity: CircleDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CircleDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleDot;", 
+            "description": "Named entity: CircleDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2299"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleMinus", 
+            "description": "Bad named entity: CircleMinus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CircleMinus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleMinus;", 
+            "description": "Named entity: CircleMinus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2296"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CirclePlus", 
+            "description": "Bad named entity: CirclePlus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CirclePlus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CirclePlus;", 
+            "description": "Named entity: CirclePlus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2295"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleTimes", 
+            "description": "Bad named entity: CircleTimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CircleTimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CircleTimes;", 
+            "description": "Named entity: CircleTimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2297"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ClockwiseContourIntegral", 
+            "description": "Bad named entity: ClockwiseContourIntegral without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ClockwiseContourIntegral"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ClockwiseContourIntegral;", 
+            "description": "Named entity: ClockwiseContourIntegral; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2232"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CloseCurlyDoubleQuote", 
+            "description": "Bad named entity: CloseCurlyDoubleQuote without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CloseCurlyDoubleQuote"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CloseCurlyDoubleQuote;", 
+            "description": "Named entity: CloseCurlyDoubleQuote; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CloseCurlyQuote", 
+            "description": "Bad named entity: CloseCurlyQuote without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CloseCurlyQuote"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CloseCurlyQuote;", 
+            "description": "Named entity: CloseCurlyQuote; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2019"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Colon", 
+            "description": "Bad named entity: Colon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Colon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Colon;", 
+            "description": "Named entity: Colon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2237"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Colone", 
+            "description": "Bad named entity: Colone without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Colone"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Colone;", 
+            "description": "Named entity: Colone; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a74"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Congruent", 
+            "description": "Bad named entity: Congruent without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Congruent"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Congruent;", 
+            "description": "Named entity: Congruent; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2261"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Conint", 
+            "description": "Bad named entity: Conint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Conint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Conint;", 
+            "description": "Named entity: Conint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ContourIntegral", 
+            "description": "Bad named entity: ContourIntegral without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ContourIntegral"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ContourIntegral;", 
+            "description": "Named entity: ContourIntegral; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Copf", 
+            "description": "Bad named entity: Copf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Copf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Copf;", 
+            "description": "Named entity: Copf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2102"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Coproduct", 
+            "description": "Bad named entity: Coproduct without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Coproduct"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Coproduct;", 
+            "description": "Named entity: Coproduct; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2210"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CounterClockwiseContourIntegral", 
+            "description": "Bad named entity: CounterClockwiseContourIntegral without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CounterClockwiseContourIntegral"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CounterClockwiseContourIntegral;", 
+            "description": "Named entity: CounterClockwiseContourIntegral; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2233"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cross", 
+            "description": "Bad named entity: Cross without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cross"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cross;", 
+            "description": "Named entity: Cross; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a2f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cscr", 
+            "description": "Bad named entity: Cscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cscr;", 
+            "description": "Named entity: Cscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udc9e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cup", 
+            "description": "Bad named entity: Cup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Cup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Cup;", 
+            "description": "Named entity: Cup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CupCap", 
+            "description": "Bad named entity: CupCap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&CupCap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&CupCap;", 
+            "description": "Named entity: CupCap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DD", 
+            "description": "Bad named entity: DD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DD;", 
+            "description": "Named entity: DD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2145"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DDotrahd", 
+            "description": "Bad named entity: DDotrahd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DDotrahd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DDotrahd;", 
+            "description": "Named entity: DDotrahd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2911"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DJcy", 
+            "description": "Bad named entity: DJcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DJcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DJcy;", 
+            "description": "Named entity: DJcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0402"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DScy", 
+            "description": "Bad named entity: DScy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DScy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DScy;", 
+            "description": "Named entity: DScy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0405"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DZcy", 
+            "description": "Bad named entity: DZcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DZcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DZcy;", 
+            "description": "Named entity: DZcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u040f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dagger", 
+            "description": "Bad named entity: Dagger without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dagger"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dagger;", 
+            "description": "Named entity: Dagger; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2021"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Darr", 
+            "description": "Bad named entity: Darr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Darr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Darr;", 
+            "description": "Named entity: Darr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dashv", 
+            "description": "Bad named entity: Dashv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dashv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dashv;", 
+            "description": "Named entity: Dashv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dcaron", 
+            "description": "Bad named entity: Dcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dcaron;", 
+            "description": "Named entity: Dcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dcy", 
+            "description": "Bad named entity: Dcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dcy;", 
+            "description": "Named entity: Dcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0414"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Del", 
+            "description": "Bad named entity: Del without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Del"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Del;", 
+            "description": "Named entity: Del; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2207"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Delta", 
+            "description": "Bad named entity: Delta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Delta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Delta;", 
+            "description": "Named entity: Delta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0394"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dfr", 
+            "description": "Bad named entity: Dfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dfr;", 
+            "description": "Named entity: Dfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd07"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalAcute", 
+            "description": "Bad named entity: DiacriticalAcute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DiacriticalAcute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalAcute;", 
+            "description": "Named entity: DiacriticalAcute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalDot", 
+            "description": "Bad named entity: DiacriticalDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DiacriticalDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalDot;", 
+            "description": "Named entity: DiacriticalDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalDoubleAcute", 
+            "description": "Bad named entity: DiacriticalDoubleAcute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DiacriticalDoubleAcute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalDoubleAcute;", 
+            "description": "Named entity: DiacriticalDoubleAcute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalGrave", 
+            "description": "Bad named entity: DiacriticalGrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DiacriticalGrave"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalGrave;", 
+            "description": "Named entity: DiacriticalGrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "`"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalTilde", 
+            "description": "Bad named entity: DiacriticalTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DiacriticalTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DiacriticalTilde;", 
+            "description": "Named entity: DiacriticalTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Diamond", 
+            "description": "Bad named entity: Diamond without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Diamond"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Diamond;", 
+            "description": "Named entity: Diamond; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DifferentialD", 
+            "description": "Bad named entity: DifferentialD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DifferentialD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DifferentialD;", 
+            "description": "Named entity: DifferentialD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2146"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dopf", 
+            "description": "Bad named entity: Dopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dopf;", 
+            "description": "Named entity: Dopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd3b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dot", 
+            "description": "Bad named entity: Dot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dot;", 
+            "description": "Named entity: Dot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DotDot", 
+            "description": "Bad named entity: DotDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DotDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DotDot;", 
+            "description": "Named entity: DotDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u20dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DotEqual", 
+            "description": "Bad named entity: DotEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DotEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DotEqual;", 
+            "description": "Named entity: DotEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2250"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleContourIntegral", 
+            "description": "Bad named entity: DoubleContourIntegral without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleContourIntegral"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleContourIntegral;", 
+            "description": "Named entity: DoubleContourIntegral; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleDot", 
+            "description": "Bad named entity: DoubleDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleDot;", 
+            "description": "Named entity: DoubleDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleDownArrow", 
+            "description": "Bad named entity: DoubleDownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleDownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleDownArrow;", 
+            "description": "Named entity: DoubleDownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftArrow", 
+            "description": "Bad named entity: DoubleLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftArrow;", 
+            "description": "Named entity: DoubleLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftRightArrow", 
+            "description": "Bad named entity: DoubleLeftRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLeftRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftRightArrow;", 
+            "description": "Named entity: DoubleLeftRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftTee", 
+            "description": "Bad named entity: DoubleLeftTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLeftTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLeftTee;", 
+            "description": "Named entity: DoubleLeftTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongLeftArrow", 
+            "description": "Bad named entity: DoubleLongLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLongLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongLeftArrow;", 
+            "description": "Named entity: DoubleLongLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongLeftRightArrow", 
+            "description": "Bad named entity: DoubleLongLeftRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLongLeftRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongLeftRightArrow;", 
+            "description": "Named entity: DoubleLongLeftRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongRightArrow", 
+            "description": "Bad named entity: DoubleLongRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleLongRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleLongRightArrow;", 
+            "description": "Named entity: DoubleLongRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleRightArrow", 
+            "description": "Bad named entity: DoubleRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleRightArrow;", 
+            "description": "Named entity: DoubleRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleRightTee", 
+            "description": "Bad named entity: DoubleRightTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleRightTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleRightTee;", 
+            "description": "Named entity: DoubleRightTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleUpArrow", 
+            "description": "Bad named entity: DoubleUpArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleUpArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleUpArrow;", 
+            "description": "Named entity: DoubleUpArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleUpDownArrow", 
+            "description": "Bad named entity: DoubleUpDownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleUpDownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleUpDownArrow;", 
+            "description": "Named entity: DoubleUpDownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleVerticalBar", 
+            "description": "Bad named entity: DoubleVerticalBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DoubleVerticalBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DoubleVerticalBar;", 
+            "description": "Named entity: DoubleVerticalBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2225"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrow", 
+            "description": "Bad named entity: DownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrow;", 
+            "description": "Named entity: DownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2193"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrowBar", 
+            "description": "Bad named entity: DownArrowBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownArrowBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrowBar;", 
+            "description": "Named entity: DownArrowBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2913"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrowUpArrow", 
+            "description": "Bad named entity: DownArrowUpArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownArrowUpArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownArrowUpArrow;", 
+            "description": "Named entity: DownArrowUpArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownBreve", 
+            "description": "Bad named entity: DownBreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownBreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownBreve;", 
+            "description": "Named entity: DownBreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0311"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftRightVector", 
+            "description": "Bad named entity: DownLeftRightVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownLeftRightVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftRightVector;", 
+            "description": "Named entity: DownLeftRightVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2950"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftTeeVector", 
+            "description": "Bad named entity: DownLeftTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownLeftTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftTeeVector;", 
+            "description": "Named entity: DownLeftTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftVector", 
+            "description": "Bad named entity: DownLeftVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownLeftVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftVector;", 
+            "description": "Named entity: DownLeftVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftVectorBar", 
+            "description": "Bad named entity: DownLeftVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownLeftVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownLeftVectorBar;", 
+            "description": "Named entity: DownLeftVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2956"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightTeeVector", 
+            "description": "Bad named entity: DownRightTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownRightTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightTeeVector;", 
+            "description": "Named entity: DownRightTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightVector", 
+            "description": "Bad named entity: DownRightVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownRightVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightVector;", 
+            "description": "Named entity: DownRightVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightVectorBar", 
+            "description": "Bad named entity: DownRightVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownRightVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownRightVectorBar;", 
+            "description": "Named entity: DownRightVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2957"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownTee", 
+            "description": "Bad named entity: DownTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownTee;", 
+            "description": "Named entity: DownTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownTeeArrow", 
+            "description": "Bad named entity: DownTeeArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&DownTeeArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&DownTeeArrow;", 
+            "description": "Named entity: DownTeeArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Downarrow", 
+            "description": "Bad named entity: Downarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Downarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Downarrow;", 
+            "description": "Named entity: Downarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dscr", 
+            "description": "Bad named entity: Dscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dscr;", 
+            "description": "Named entity: Dscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udc9f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dstrok", 
+            "description": "Bad named entity: Dstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Dstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Dstrok;", 
+            "description": "Named entity: Dstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0110"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ENG", 
+            "description": "Bad named entity: ENG without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ENG"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ENG;", 
+            "description": "Named entity: ENG; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u014a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ETH", 
+            "description": "Named entity: ETH without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ETH;", 
+            "description": "Named entity: ETH; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eacute", 
+            "description": "Named entity: Eacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eacute;", 
+            "description": "Named entity: Eacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecaron", 
+            "description": "Bad named entity: Ecaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ecaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecaron;", 
+            "description": "Named entity: Ecaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecirc", 
+            "description": "Named entity: Ecirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecirc;", 
+            "description": "Named entity: Ecirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecy", 
+            "description": "Bad named entity: Ecy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ecy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ecy;", 
+            "description": "Named entity: Ecy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Edot", 
+            "description": "Bad named entity: Edot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Edot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Edot;", 
+            "description": "Named entity: Edot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0116"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Efr", 
+            "description": "Bad named entity: Efr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Efr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Efr;", 
+            "description": "Named entity: Efr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd08"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Egrave", 
+            "description": "Named entity: Egrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Egrave;", 
+            "description": "Named entity: Egrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Element", 
+            "description": "Bad named entity: Element without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Element"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Element;", 
+            "description": "Named entity: Element; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2208"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Emacr", 
+            "description": "Bad named entity: Emacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Emacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Emacr;", 
+            "description": "Named entity: Emacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0112"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EmptySmallSquare", 
+            "description": "Bad named entity: EmptySmallSquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&EmptySmallSquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EmptySmallSquare;", 
+            "description": "Named entity: EmptySmallSquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25fb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EmptyVerySmallSquare", 
+            "description": "Bad named entity: EmptyVerySmallSquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&EmptyVerySmallSquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EmptyVerySmallSquare;", 
+            "description": "Named entity: EmptyVerySmallSquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eogon", 
+            "description": "Bad named entity: Eogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Eogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eogon;", 
+            "description": "Named entity: Eogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0118"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eopf", 
+            "description": "Bad named entity: Eopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Eopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eopf;", 
+            "description": "Named entity: Eopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd3c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Epsilon", 
+            "description": "Bad named entity: Epsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Epsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Epsilon;", 
+            "description": "Named entity: Epsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0395"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Equal", 
+            "description": "Bad named entity: Equal without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Equal"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Equal;", 
+            "description": "Named entity: Equal; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a75"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EqualTilde", 
+            "description": "Bad named entity: EqualTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&EqualTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&EqualTilde;", 
+            "description": "Named entity: EqualTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2242"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Equilibrium", 
+            "description": "Bad named entity: Equilibrium without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Equilibrium"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Equilibrium;", 
+            "description": "Named entity: Equilibrium; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Escr", 
+            "description": "Bad named entity: Escr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Escr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Escr;", 
+            "description": "Named entity: Escr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2130"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Esim", 
+            "description": "Bad named entity: Esim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Esim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Esim;", 
+            "description": "Named entity: Esim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a73"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eta", 
+            "description": "Bad named entity: Eta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Eta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Eta;", 
+            "description": "Named entity: Eta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0397"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Euml", 
+            "description": "Named entity: Euml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Euml;", 
+            "description": "Named entity: Euml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Exists", 
+            "description": "Bad named entity: Exists without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Exists"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Exists;", 
+            "description": "Named entity: Exists; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2203"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ExponentialE", 
+            "description": "Bad named entity: ExponentialE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ExponentialE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ExponentialE;", 
+            "description": "Named entity: ExponentialE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2147"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fcy", 
+            "description": "Bad named entity: Fcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Fcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fcy;", 
+            "description": "Named entity: Fcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0424"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ffr", 
+            "description": "Bad named entity: Ffr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ffr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ffr;", 
+            "description": "Named entity: Ffr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd09"
+                ]
+            ]
+        }, 
+        {
+            "input": "&FilledSmallSquare", 
+            "description": "Bad named entity: FilledSmallSquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&FilledSmallSquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&FilledSmallSquare;", 
+            "description": "Named entity: FilledSmallSquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&FilledVerySmallSquare", 
+            "description": "Bad named entity: FilledVerySmallSquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&FilledVerySmallSquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&FilledVerySmallSquare;", 
+            "description": "Named entity: FilledVerySmallSquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fopf", 
+            "description": "Bad named entity: Fopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Fopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fopf;", 
+            "description": "Named entity: Fopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd3d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ForAll", 
+            "description": "Bad named entity: ForAll without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ForAll"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ForAll;", 
+            "description": "Named entity: ForAll; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2200"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fouriertrf", 
+            "description": "Bad named entity: Fouriertrf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Fouriertrf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fouriertrf;", 
+            "description": "Named entity: Fouriertrf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2131"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fscr", 
+            "description": "Bad named entity: Fscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Fscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Fscr;", 
+            "description": "Named entity: Fscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2131"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GJcy", 
+            "description": "Bad named entity: GJcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GJcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GJcy;", 
+            "description": "Named entity: GJcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0403"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GT", 
+            "description": "Named entity: GT without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    ">"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GT;", 
+            "description": "Named entity: GT; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ">"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gamma", 
+            "description": "Bad named entity: Gamma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gamma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gamma;", 
+            "description": "Named entity: Gamma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0393"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gammad", 
+            "description": "Bad named entity: Gammad without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gammad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gammad;", 
+            "description": "Named entity: Gammad; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gbreve", 
+            "description": "Bad named entity: Gbreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gbreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gbreve;", 
+            "description": "Named entity: Gbreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcedil", 
+            "description": "Bad named entity: Gcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcedil;", 
+            "description": "Named entity: Gcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0122"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcirc", 
+            "description": "Bad named entity: Gcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcirc;", 
+            "description": "Named entity: Gcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcy", 
+            "description": "Bad named entity: Gcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gcy;", 
+            "description": "Named entity: Gcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0413"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gdot", 
+            "description": "Bad named entity: Gdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gdot;", 
+            "description": "Named entity: Gdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0120"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gfr", 
+            "description": "Bad named entity: Gfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gfr;", 
+            "description": "Named entity: Gfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd0a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gg", 
+            "description": "Bad named entity: Gg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gg;", 
+            "description": "Named entity: Gg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gopf", 
+            "description": "Bad named entity: Gopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gopf;", 
+            "description": "Named entity: Gopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd3e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterEqual", 
+            "description": "Bad named entity: GreaterEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterEqual;", 
+            "description": "Named entity: GreaterEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2265"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterEqualLess", 
+            "description": "Bad named entity: GreaterEqualLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterEqualLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterEqualLess;", 
+            "description": "Named entity: GreaterEqualLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterFullEqual", 
+            "description": "Bad named entity: GreaterFullEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterFullEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterFullEqual;", 
+            "description": "Named entity: GreaterFullEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterGreater", 
+            "description": "Bad named entity: GreaterGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterGreater;", 
+            "description": "Named entity: GreaterGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterLess", 
+            "description": "Bad named entity: GreaterLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterLess;", 
+            "description": "Named entity: GreaterLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2277"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterSlantEqual", 
+            "description": "Bad named entity: GreaterSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterSlantEqual;", 
+            "description": "Named entity: GreaterSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterTilde", 
+            "description": "Bad named entity: GreaterTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&GreaterTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&GreaterTilde;", 
+            "description": "Named entity: GreaterTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2273"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gscr", 
+            "description": "Bad named entity: Gscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gscr;", 
+            "description": "Named entity: Gscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udca2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gt", 
+            "description": "Bad named entity: Gt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Gt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Gt;", 
+            "description": "Named entity: Gt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HARDcy", 
+            "description": "Bad named entity: HARDcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&HARDcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HARDcy;", 
+            "description": "Named entity: HARDcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hacek", 
+            "description": "Bad named entity: Hacek without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hacek"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hacek;", 
+            "description": "Named entity: Hacek; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hat", 
+            "description": "Bad named entity: Hat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hat;", 
+            "description": "Named entity: Hat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "^"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hcirc", 
+            "description": "Bad named entity: Hcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hcirc;", 
+            "description": "Named entity: Hcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0124"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hfr", 
+            "description": "Bad named entity: Hfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hfr;", 
+            "description": "Named entity: Hfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HilbertSpace", 
+            "description": "Bad named entity: HilbertSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&HilbertSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HilbertSpace;", 
+            "description": "Named entity: HilbertSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hopf", 
+            "description": "Bad named entity: Hopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hopf;", 
+            "description": "Named entity: Hopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HorizontalLine", 
+            "description": "Bad named entity: HorizontalLine without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&HorizontalLine"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HorizontalLine;", 
+            "description": "Named entity: HorizontalLine; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2500"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hscr", 
+            "description": "Bad named entity: Hscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hscr;", 
+            "description": "Named entity: Hscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hstrok", 
+            "description": "Bad named entity: Hstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Hstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Hstrok;", 
+            "description": "Named entity: Hstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0126"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HumpDownHump", 
+            "description": "Bad named entity: HumpDownHump without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&HumpDownHump"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HumpDownHump;", 
+            "description": "Named entity: HumpDownHump; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HumpEqual", 
+            "description": "Bad named entity: HumpEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&HumpEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&HumpEqual;", 
+            "description": "Named entity: HumpEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IEcy", 
+            "description": "Bad named entity: IEcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&IEcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IEcy;", 
+            "description": "Named entity: IEcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0415"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IJlig", 
+            "description": "Bad named entity: IJlig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&IJlig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IJlig;", 
+            "description": "Named entity: IJlig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0132"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IOcy", 
+            "description": "Bad named entity: IOcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&IOcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&IOcy;", 
+            "description": "Named entity: IOcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0401"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iacute", 
+            "description": "Named entity: Iacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iacute;", 
+            "description": "Named entity: Iacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Icirc", 
+            "description": "Named entity: Icirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Icirc;", 
+            "description": "Named entity: Icirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Icy", 
+            "description": "Bad named entity: Icy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Icy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Icy;", 
+            "description": "Named entity: Icy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0418"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Idot", 
+            "description": "Bad named entity: Idot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Idot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Idot;", 
+            "description": "Named entity: Idot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0130"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ifr", 
+            "description": "Bad named entity: Ifr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ifr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ifr;", 
+            "description": "Named entity: Ifr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2111"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Igrave", 
+            "description": "Named entity: Igrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Igrave;", 
+            "description": "Named entity: Igrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Im", 
+            "description": "Bad named entity: Im without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Im"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Im;", 
+            "description": "Named entity: Im; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2111"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Imacr", 
+            "description": "Bad named entity: Imacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Imacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Imacr;", 
+            "description": "Named entity: Imacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u012a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ImaginaryI", 
+            "description": "Bad named entity: ImaginaryI without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ImaginaryI"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ImaginaryI;", 
+            "description": "Named entity: ImaginaryI; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2148"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Implies", 
+            "description": "Bad named entity: Implies without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Implies"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Implies;", 
+            "description": "Named entity: Implies; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Int", 
+            "description": "Bad named entity: Int without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Int"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Int;", 
+            "description": "Named entity: Int; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Integral", 
+            "description": "Bad named entity: Integral without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Integral"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Integral;", 
+            "description": "Named entity: Integral; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Intersection", 
+            "description": "Bad named entity: Intersection without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Intersection"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Intersection;", 
+            "description": "Named entity: Intersection; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&InvisibleComma", 
+            "description": "Bad named entity: InvisibleComma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&InvisibleComma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&InvisibleComma;", 
+            "description": "Named entity: InvisibleComma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2063"
+                ]
+            ]
+        }, 
+        {
+            "input": "&InvisibleTimes", 
+            "description": "Bad named entity: InvisibleTimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&InvisibleTimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&InvisibleTimes;", 
+            "description": "Named entity: InvisibleTimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2062"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iogon", 
+            "description": "Bad named entity: Iogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Iogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iogon;", 
+            "description": "Named entity: Iogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u012e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iopf", 
+            "description": "Bad named entity: Iopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Iopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iopf;", 
+            "description": "Named entity: Iopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd40"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iota", 
+            "description": "Bad named entity: Iota without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Iota"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iota;", 
+            "description": "Named entity: Iota; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0399"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iscr", 
+            "description": "Bad named entity: Iscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Iscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iscr;", 
+            "description": "Named entity: Iscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2110"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Itilde", 
+            "description": "Bad named entity: Itilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Itilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Itilde;", 
+            "description": "Named entity: Itilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0128"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iukcy", 
+            "description": "Bad named entity: Iukcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Iukcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iukcy;", 
+            "description": "Named entity: Iukcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0406"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iuml", 
+            "description": "Named entity: Iuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Iuml;", 
+            "description": "Named entity: Iuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jcirc", 
+            "description": "Bad named entity: Jcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jcirc;", 
+            "description": "Named entity: Jcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0134"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jcy", 
+            "description": "Bad named entity: Jcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jcy;", 
+            "description": "Named entity: Jcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0419"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jfr", 
+            "description": "Bad named entity: Jfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jfr;", 
+            "description": "Named entity: Jfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd0d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jopf", 
+            "description": "Bad named entity: Jopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jopf;", 
+            "description": "Named entity: Jopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd41"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jscr", 
+            "description": "Bad named entity: Jscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jscr;", 
+            "description": "Named entity: Jscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udca5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jsercy", 
+            "description": "Bad named entity: Jsercy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jsercy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jsercy;", 
+            "description": "Named entity: Jsercy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0408"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jukcy", 
+            "description": "Bad named entity: Jukcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Jukcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Jukcy;", 
+            "description": "Named entity: Jukcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0404"
+                ]
+            ]
+        }, 
+        {
+            "input": "&KHcy", 
+            "description": "Bad named entity: KHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&KHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&KHcy;", 
+            "description": "Named entity: KHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0425"
+                ]
+            ]
+        }, 
+        {
+            "input": "&KJcy", 
+            "description": "Bad named entity: KJcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&KJcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&KJcy;", 
+            "description": "Named entity: KJcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u040c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kappa", 
+            "description": "Bad named entity: Kappa without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kappa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kappa;", 
+            "description": "Named entity: Kappa; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kcedil", 
+            "description": "Bad named entity: Kcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kcedil;", 
+            "description": "Named entity: Kcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0136"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kcy", 
+            "description": "Bad named entity: Kcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kcy;", 
+            "description": "Named entity: Kcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kfr", 
+            "description": "Bad named entity: Kfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kfr;", 
+            "description": "Named entity: Kfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd0e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kopf", 
+            "description": "Bad named entity: Kopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kopf;", 
+            "description": "Named entity: Kopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd42"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kscr", 
+            "description": "Bad named entity: Kscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Kscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Kscr;", 
+            "description": "Named entity: Kscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udca6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LJcy", 
+            "description": "Bad named entity: LJcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LJcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LJcy;", 
+            "description": "Named entity: LJcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0409"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LT", 
+            "description": "Named entity: LT without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "<"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LT;", 
+            "description": "Named entity: LT; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "<"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lacute", 
+            "description": "Bad named entity: Lacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lacute;", 
+            "description": "Named entity: Lacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0139"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lambda", 
+            "description": "Bad named entity: Lambda without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lambda"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lambda;", 
+            "description": "Named entity: Lambda; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lang", 
+            "description": "Bad named entity: Lang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lang;", 
+            "description": "Named entity: Lang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Laplacetrf", 
+            "description": "Bad named entity: Laplacetrf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Laplacetrf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Laplacetrf;", 
+            "description": "Named entity: Laplacetrf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2112"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Larr", 
+            "description": "Bad named entity: Larr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Larr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Larr;", 
+            "description": "Named entity: Larr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcaron", 
+            "description": "Bad named entity: Lcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcaron;", 
+            "description": "Named entity: Lcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcedil", 
+            "description": "Bad named entity: Lcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcedil;", 
+            "description": "Named entity: Lcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcy", 
+            "description": "Bad named entity: Lcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lcy;", 
+            "description": "Named entity: Lcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftAngleBracket", 
+            "description": "Bad named entity: LeftAngleBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftAngleBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftAngleBracket;", 
+            "description": "Named entity: LeftAngleBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrow", 
+            "description": "Bad named entity: LeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrow;", 
+            "description": "Named entity: LeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2190"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrowBar", 
+            "description": "Bad named entity: LeftArrowBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftArrowBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrowBar;", 
+            "description": "Named entity: LeftArrowBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21e4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrowRightArrow", 
+            "description": "Bad named entity: LeftArrowRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftArrowRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftArrowRightArrow;", 
+            "description": "Named entity: LeftArrowRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftCeiling", 
+            "description": "Bad named entity: LeftCeiling without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftCeiling"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftCeiling;", 
+            "description": "Named entity: LeftCeiling; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2308"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDoubleBracket", 
+            "description": "Bad named entity: LeftDoubleBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftDoubleBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDoubleBracket;", 
+            "description": "Named entity: LeftDoubleBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownTeeVector", 
+            "description": "Bad named entity: LeftDownTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftDownTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownTeeVector;", 
+            "description": "Named entity: LeftDownTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2961"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownVector", 
+            "description": "Bad named entity: LeftDownVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftDownVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownVector;", 
+            "description": "Named entity: LeftDownVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownVectorBar", 
+            "description": "Bad named entity: LeftDownVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftDownVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftDownVectorBar;", 
+            "description": "Named entity: LeftDownVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2959"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftFloor", 
+            "description": "Bad named entity: LeftFloor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftFloor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftFloor;", 
+            "description": "Named entity: LeftFloor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftRightArrow", 
+            "description": "Bad named entity: LeftRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftRightArrow;", 
+            "description": "Named entity: LeftRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2194"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftRightVector", 
+            "description": "Bad named entity: LeftRightVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftRightVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftRightVector;", 
+            "description": "Named entity: LeftRightVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u294e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTee", 
+            "description": "Bad named entity: LeftTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTee;", 
+            "description": "Named entity: LeftTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTeeArrow", 
+            "description": "Bad named entity: LeftTeeArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTeeArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTeeArrow;", 
+            "description": "Named entity: LeftTeeArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTeeVector", 
+            "description": "Bad named entity: LeftTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTeeVector;", 
+            "description": "Named entity: LeftTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangle", 
+            "description": "Bad named entity: LeftTriangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTriangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangle;", 
+            "description": "Named entity: LeftTriangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangleBar", 
+            "description": "Bad named entity: LeftTriangleBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTriangleBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangleBar;", 
+            "description": "Named entity: LeftTriangleBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangleEqual", 
+            "description": "Bad named entity: LeftTriangleEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftTriangleEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftTriangleEqual;", 
+            "description": "Named entity: LeftTriangleEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpDownVector", 
+            "description": "Bad named entity: LeftUpDownVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftUpDownVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpDownVector;", 
+            "description": "Named entity: LeftUpDownVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2951"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpTeeVector", 
+            "description": "Bad named entity: LeftUpTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftUpTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpTeeVector;", 
+            "description": "Named entity: LeftUpTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2960"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpVector", 
+            "description": "Bad named entity: LeftUpVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftUpVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpVector;", 
+            "description": "Named entity: LeftUpVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpVectorBar", 
+            "description": "Bad named entity: LeftUpVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftUpVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftUpVectorBar;", 
+            "description": "Named entity: LeftUpVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2958"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftVector", 
+            "description": "Bad named entity: LeftVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftVector;", 
+            "description": "Named entity: LeftVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftVectorBar", 
+            "description": "Bad named entity: LeftVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LeftVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LeftVectorBar;", 
+            "description": "Named entity: LeftVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2952"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Leftarrow", 
+            "description": "Bad named entity: Leftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Leftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Leftarrow;", 
+            "description": "Named entity: Leftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Leftrightarrow", 
+            "description": "Bad named entity: Leftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Leftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Leftrightarrow;", 
+            "description": "Named entity: Leftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessEqualGreater", 
+            "description": "Bad named entity: LessEqualGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessEqualGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessEqualGreater;", 
+            "description": "Named entity: LessEqualGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessFullEqual", 
+            "description": "Bad named entity: LessFullEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessFullEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessFullEqual;", 
+            "description": "Named entity: LessFullEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2266"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessGreater", 
+            "description": "Bad named entity: LessGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessGreater;", 
+            "description": "Named entity: LessGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2276"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessLess", 
+            "description": "Bad named entity: LessLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessLess;", 
+            "description": "Named entity: LessLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessSlantEqual", 
+            "description": "Bad named entity: LessSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessSlantEqual;", 
+            "description": "Named entity: LessSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessTilde", 
+            "description": "Bad named entity: LessTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LessTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LessTilde;", 
+            "description": "Named entity: LessTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2272"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lfr", 
+            "description": "Bad named entity: Lfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lfr;", 
+            "description": "Named entity: Lfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd0f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ll", 
+            "description": "Bad named entity: Ll without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ll"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ll;", 
+            "description": "Named entity: Ll; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lleftarrow", 
+            "description": "Bad named entity: Lleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lleftarrow;", 
+            "description": "Named entity: Lleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lmidot", 
+            "description": "Bad named entity: Lmidot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lmidot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lmidot;", 
+            "description": "Named entity: Lmidot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongLeftArrow", 
+            "description": "Bad named entity: LongLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LongLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongLeftArrow;", 
+            "description": "Named entity: LongLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongLeftRightArrow", 
+            "description": "Bad named entity: LongLeftRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LongLeftRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongLeftRightArrow;", 
+            "description": "Named entity: LongLeftRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongRightArrow", 
+            "description": "Bad named entity: LongRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LongRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LongRightArrow;", 
+            "description": "Named entity: LongRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longleftarrow", 
+            "description": "Bad named entity: Longleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Longleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longleftarrow;", 
+            "description": "Named entity: Longleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longleftrightarrow", 
+            "description": "Bad named entity: Longleftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Longleftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longleftrightarrow;", 
+            "description": "Named entity: Longleftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longrightarrow", 
+            "description": "Bad named entity: Longrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Longrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Longrightarrow;", 
+            "description": "Named entity: Longrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lopf", 
+            "description": "Bad named entity: Lopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lopf;", 
+            "description": "Named entity: Lopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd43"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LowerLeftArrow", 
+            "description": "Bad named entity: LowerLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LowerLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LowerLeftArrow;", 
+            "description": "Named entity: LowerLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2199"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LowerRightArrow", 
+            "description": "Bad named entity: LowerRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&LowerRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&LowerRightArrow;", 
+            "description": "Named entity: LowerRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2198"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lscr", 
+            "description": "Bad named entity: Lscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lscr;", 
+            "description": "Named entity: Lscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2112"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lsh", 
+            "description": "Bad named entity: Lsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lsh;", 
+            "description": "Named entity: Lsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lstrok", 
+            "description": "Bad named entity: Lstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lstrok;", 
+            "description": "Named entity: Lstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0141"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lt", 
+            "description": "Bad named entity: Lt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Lt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Lt;", 
+            "description": "Named entity: Lt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Map", 
+            "description": "Bad named entity: Map without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Map"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Map;", 
+            "description": "Named entity: Map; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2905"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mcy", 
+            "description": "Bad named entity: Mcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mcy;", 
+            "description": "Named entity: Mcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&MediumSpace", 
+            "description": "Bad named entity: MediumSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&MediumSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&MediumSpace;", 
+            "description": "Named entity: MediumSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u205f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mellintrf", 
+            "description": "Bad named entity: Mellintrf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mellintrf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mellintrf;", 
+            "description": "Named entity: Mellintrf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2133"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mfr", 
+            "description": "Bad named entity: Mfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mfr;", 
+            "description": "Named entity: Mfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd10"
+                ]
+            ]
+        }, 
+        {
+            "input": "&MinusPlus", 
+            "description": "Bad named entity: MinusPlus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&MinusPlus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&MinusPlus;", 
+            "description": "Named entity: MinusPlus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2213"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mopf", 
+            "description": "Bad named entity: Mopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mopf;", 
+            "description": "Named entity: Mopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd44"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mscr", 
+            "description": "Bad named entity: Mscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mscr;", 
+            "description": "Named entity: Mscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2133"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mu", 
+            "description": "Bad named entity: Mu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Mu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Mu;", 
+            "description": "Named entity: Mu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NJcy", 
+            "description": "Bad named entity: NJcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NJcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NJcy;", 
+            "description": "Named entity: NJcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u040a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nacute", 
+            "description": "Bad named entity: Nacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Nacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nacute;", 
+            "description": "Named entity: Nacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0143"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncaron", 
+            "description": "Bad named entity: Ncaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ncaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncaron;", 
+            "description": "Named entity: Ncaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0147"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncedil", 
+            "description": "Bad named entity: Ncedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ncedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncedil;", 
+            "description": "Named entity: Ncedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0145"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncy", 
+            "description": "Bad named entity: Ncy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ncy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ncy;", 
+            "description": "Named entity: Ncy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeMediumSpace", 
+            "description": "Bad named entity: NegativeMediumSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NegativeMediumSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeMediumSpace;", 
+            "description": "Named entity: NegativeMediumSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeThickSpace", 
+            "description": "Bad named entity: NegativeThickSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NegativeThickSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeThickSpace;", 
+            "description": "Named entity: NegativeThickSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeThinSpace", 
+            "description": "Bad named entity: NegativeThinSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NegativeThinSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeThinSpace;", 
+            "description": "Named entity: NegativeThinSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeVeryThinSpace", 
+            "description": "Bad named entity: NegativeVeryThinSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NegativeVeryThinSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NegativeVeryThinSpace;", 
+            "description": "Named entity: NegativeVeryThinSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NestedGreaterGreater", 
+            "description": "Bad named entity: NestedGreaterGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NestedGreaterGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NestedGreaterGreater;", 
+            "description": "Named entity: NestedGreaterGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NestedLessLess", 
+            "description": "Bad named entity: NestedLessLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NestedLessLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NestedLessLess;", 
+            "description": "Named entity: NestedLessLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NewLine", 
+            "description": "Bad named entity: NewLine without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NewLine"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NewLine;", 
+            "description": "Named entity: NewLine; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\n"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nfr", 
+            "description": "Bad named entity: Nfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Nfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nfr;", 
+            "description": "Named entity: Nfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd11"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NoBreak", 
+            "description": "Bad named entity: NoBreak without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NoBreak"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NoBreak;", 
+            "description": "Named entity: NoBreak; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2060"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NonBreakingSpace", 
+            "description": "Bad named entity: NonBreakingSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NonBreakingSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NonBreakingSpace;", 
+            "description": "Named entity: NonBreakingSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nopf", 
+            "description": "Bad named entity: Nopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Nopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nopf;", 
+            "description": "Named entity: Nopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2115"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Not", 
+            "description": "Bad named entity: Not without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Not"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Not;", 
+            "description": "Named entity: Not; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotCongruent", 
+            "description": "Bad named entity: NotCongruent without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotCongruent"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotCongruent;", 
+            "description": "Named entity: NotCongruent; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2262"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotCupCap", 
+            "description": "Bad named entity: NotCupCap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotCupCap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotCupCap;", 
+            "description": "Named entity: NotCupCap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotDoubleVerticalBar", 
+            "description": "Bad named entity: NotDoubleVerticalBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotDoubleVerticalBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotDoubleVerticalBar;", 
+            "description": "Named entity: NotDoubleVerticalBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2226"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotElement", 
+            "description": "Bad named entity: NotElement without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotElement"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotElement;", 
+            "description": "Named entity: NotElement; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2209"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotEqual", 
+            "description": "Bad named entity: NotEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotEqual;", 
+            "description": "Named entity: NotEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2260"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotEqualTilde", 
+            "description": "Bad named entity: NotEqualTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotEqualTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotEqualTilde;", 
+            "description": "Named entity: NotEqualTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2242\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotExists", 
+            "description": "Bad named entity: NotExists without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotExists"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotExists;", 
+            "description": "Named entity: NotExists; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2204"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreater", 
+            "description": "Bad named entity: NotGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreater;", 
+            "description": "Named entity: NotGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterEqual", 
+            "description": "Bad named entity: NotGreaterEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterEqual;", 
+            "description": "Named entity: NotGreaterEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2271"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterFullEqual", 
+            "description": "Bad named entity: NotGreaterFullEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterFullEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterFullEqual;", 
+            "description": "Named entity: NotGreaterFullEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterGreater", 
+            "description": "Bad named entity: NotGreaterGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterGreater;", 
+            "description": "Named entity: NotGreaterGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterLess", 
+            "description": "Bad named entity: NotGreaterLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterLess;", 
+            "description": "Named entity: NotGreaterLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2279"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterSlantEqual", 
+            "description": "Bad named entity: NotGreaterSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterSlantEqual;", 
+            "description": "Named entity: NotGreaterSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterTilde", 
+            "description": "Bad named entity: NotGreaterTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotGreaterTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotGreaterTilde;", 
+            "description": "Named entity: NotGreaterTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2275"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotHumpDownHump", 
+            "description": "Bad named entity: NotHumpDownHump without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotHumpDownHump"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotHumpDownHump;", 
+            "description": "Named entity: NotHumpDownHump; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224e\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotHumpEqual", 
+            "description": "Bad named entity: NotHumpEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotHumpEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotHumpEqual;", 
+            "description": "Named entity: NotHumpEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224f\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangle", 
+            "description": "Bad named entity: NotLeftTriangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLeftTriangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangle;", 
+            "description": "Named entity: NotLeftTriangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangleBar", 
+            "description": "Bad named entity: NotLeftTriangleBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLeftTriangleBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangleBar;", 
+            "description": "Named entity: NotLeftTriangleBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29cf\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangleEqual", 
+            "description": "Bad named entity: NotLeftTriangleEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLeftTriangleEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLeftTriangleEqual;", 
+            "description": "Named entity: NotLeftTriangleEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLess", 
+            "description": "Bad named entity: NotLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLess;", 
+            "description": "Named entity: NotLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessEqual", 
+            "description": "Bad named entity: NotLessEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLessEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessEqual;", 
+            "description": "Named entity: NotLessEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2270"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessGreater", 
+            "description": "Bad named entity: NotLessGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLessGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessGreater;", 
+            "description": "Named entity: NotLessGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2278"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessLess", 
+            "description": "Bad named entity: NotLessLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLessLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessLess;", 
+            "description": "Named entity: NotLessLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessSlantEqual", 
+            "description": "Bad named entity: NotLessSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLessSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessSlantEqual;", 
+            "description": "Named entity: NotLessSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessTilde", 
+            "description": "Bad named entity: NotLessTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotLessTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotLessTilde;", 
+            "description": "Named entity: NotLessTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2274"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotNestedGreaterGreater", 
+            "description": "Bad named entity: NotNestedGreaterGreater without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotNestedGreaterGreater"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotNestedGreaterGreater;", 
+            "description": "Named entity: NotNestedGreaterGreater; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa2\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotNestedLessLess", 
+            "description": "Bad named entity: NotNestedLessLess without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotNestedLessLess"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotNestedLessLess;", 
+            "description": "Named entity: NotNestedLessLess; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa1\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedes", 
+            "description": "Bad named entity: NotPrecedes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotPrecedes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedes;", 
+            "description": "Named entity: NotPrecedes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2280"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedesEqual", 
+            "description": "Bad named entity: NotPrecedesEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotPrecedesEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedesEqual;", 
+            "description": "Named entity: NotPrecedesEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedesSlantEqual", 
+            "description": "Bad named entity: NotPrecedesSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotPrecedesSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotPrecedesSlantEqual;", 
+            "description": "Named entity: NotPrecedesSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotReverseElement", 
+            "description": "Bad named entity: NotReverseElement without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotReverseElement"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotReverseElement;", 
+            "description": "Named entity: NotReverseElement; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangle", 
+            "description": "Bad named entity: NotRightTriangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotRightTriangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangle;", 
+            "description": "Named entity: NotRightTriangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangleBar", 
+            "description": "Bad named entity: NotRightTriangleBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotRightTriangleBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangleBar;", 
+            "description": "Named entity: NotRightTriangleBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29d0\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangleEqual", 
+            "description": "Bad named entity: NotRightTriangleEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotRightTriangleEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotRightTriangleEqual;", 
+            "description": "Named entity: NotRightTriangleEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSubset", 
+            "description": "Bad named entity: NotSquareSubset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSquareSubset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSubset;", 
+            "description": "Named entity: NotSquareSubset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228f\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSubsetEqual", 
+            "description": "Bad named entity: NotSquareSubsetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSquareSubsetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSubsetEqual;", 
+            "description": "Named entity: NotSquareSubsetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSuperset", 
+            "description": "Bad named entity: NotSquareSuperset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSquareSuperset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSuperset;", 
+            "description": "Named entity: NotSquareSuperset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2290\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSupersetEqual", 
+            "description": "Bad named entity: NotSquareSupersetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSquareSupersetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSquareSupersetEqual;", 
+            "description": "Named entity: NotSquareSupersetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSubset", 
+            "description": "Bad named entity: NotSubset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSubset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSubset;", 
+            "description": "Named entity: NotSubset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2282\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSubsetEqual", 
+            "description": "Bad named entity: NotSubsetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSubsetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSubsetEqual;", 
+            "description": "Named entity: NotSubsetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2288"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceeds", 
+            "description": "Bad named entity: NotSucceeds without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSucceeds"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceeds;", 
+            "description": "Named entity: NotSucceeds; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2281"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsEqual", 
+            "description": "Bad named entity: NotSucceedsEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSucceedsEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsEqual;", 
+            "description": "Named entity: NotSucceedsEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsSlantEqual", 
+            "description": "Bad named entity: NotSucceedsSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSucceedsSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsSlantEqual;", 
+            "description": "Named entity: NotSucceedsSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsTilde", 
+            "description": "Bad named entity: NotSucceedsTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSucceedsTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSucceedsTilde;", 
+            "description": "Named entity: NotSucceedsTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227f\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSuperset", 
+            "description": "Bad named entity: NotSuperset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSuperset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSuperset;", 
+            "description": "Named entity: NotSuperset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSupersetEqual", 
+            "description": "Bad named entity: NotSupersetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotSupersetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotSupersetEqual;", 
+            "description": "Named entity: NotSupersetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2289"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTilde", 
+            "description": "Bad named entity: NotTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTilde;", 
+            "description": "Named entity: NotTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2241"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeEqual", 
+            "description": "Bad named entity: NotTildeEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotTildeEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeEqual;", 
+            "description": "Named entity: NotTildeEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2244"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeFullEqual", 
+            "description": "Bad named entity: NotTildeFullEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotTildeFullEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeFullEqual;", 
+            "description": "Named entity: NotTildeFullEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2247"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeTilde", 
+            "description": "Bad named entity: NotTildeTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotTildeTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotTildeTilde;", 
+            "description": "Named entity: NotTildeTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2249"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotVerticalBar", 
+            "description": "Bad named entity: NotVerticalBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&NotVerticalBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&NotVerticalBar;", 
+            "description": "Named entity: NotVerticalBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2224"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nscr", 
+            "description": "Bad named entity: Nscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Nscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nscr;", 
+            "description": "Named entity: Nscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udca9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ntilde", 
+            "description": "Named entity: Ntilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ntilde;", 
+            "description": "Named entity: Ntilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nu", 
+            "description": "Bad named entity: Nu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Nu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Nu;", 
+            "description": "Named entity: Nu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OElig", 
+            "description": "Bad named entity: OElig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OElig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OElig;", 
+            "description": "Named entity: OElig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0152"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oacute", 
+            "description": "Named entity: Oacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oacute;", 
+            "description": "Named entity: Oacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ocirc", 
+            "description": "Named entity: Ocirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ocirc;", 
+            "description": "Named entity: Ocirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ocy", 
+            "description": "Bad named entity: Ocy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ocy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ocy;", 
+            "description": "Named entity: Ocy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Odblac", 
+            "description": "Bad named entity: Odblac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Odblac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Odblac;", 
+            "description": "Named entity: Odblac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0150"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ofr", 
+            "description": "Bad named entity: Ofr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ofr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ofr;", 
+            "description": "Named entity: Ofr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd12"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ograve", 
+            "description": "Named entity: Ograve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ograve;", 
+            "description": "Named entity: Ograve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omacr", 
+            "description": "Bad named entity: Omacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Omacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omacr;", 
+            "description": "Named entity: Omacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u014c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omega", 
+            "description": "Bad named entity: Omega without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Omega"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omega;", 
+            "description": "Named entity: Omega; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omicron", 
+            "description": "Bad named entity: Omicron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Omicron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Omicron;", 
+            "description": "Named entity: Omicron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oopf", 
+            "description": "Bad named entity: Oopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Oopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oopf;", 
+            "description": "Named entity: Oopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd46"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OpenCurlyDoubleQuote", 
+            "description": "Bad named entity: OpenCurlyDoubleQuote without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OpenCurlyDoubleQuote"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OpenCurlyDoubleQuote;", 
+            "description": "Named entity: OpenCurlyDoubleQuote; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OpenCurlyQuote", 
+            "description": "Bad named entity: OpenCurlyQuote without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OpenCurlyQuote"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OpenCurlyQuote;", 
+            "description": "Named entity: OpenCurlyQuote; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2018"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Or", 
+            "description": "Bad named entity: Or without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Or"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Or;", 
+            "description": "Named entity: Or; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a54"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oscr", 
+            "description": "Bad named entity: Oscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Oscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oscr;", 
+            "description": "Named entity: Oscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcaa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oslash", 
+            "description": "Named entity: Oslash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Oslash;", 
+            "description": "Named entity: Oslash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Otilde", 
+            "description": "Named entity: Otilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Otilde;", 
+            "description": "Named entity: Otilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Otimes", 
+            "description": "Bad named entity: Otimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Otimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Otimes;", 
+            "description": "Named entity: Otimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a37"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ouml", 
+            "description": "Named entity: Ouml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ouml;", 
+            "description": "Named entity: Ouml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBar", 
+            "description": "Bad named entity: OverBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OverBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBar;", 
+            "description": "Named entity: OverBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u203e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBrace", 
+            "description": "Bad named entity: OverBrace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OverBrace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBrace;", 
+            "description": "Named entity: OverBrace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBracket", 
+            "description": "Bad named entity: OverBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OverBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverBracket;", 
+            "description": "Named entity: OverBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverParenthesis", 
+            "description": "Bad named entity: OverParenthesis without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&OverParenthesis"
+                ]
+            ]
+        }, 
+        {
+            "input": "&OverParenthesis;", 
+            "description": "Named entity: OverParenthesis; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PartialD", 
+            "description": "Bad named entity: PartialD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&PartialD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PartialD;", 
+            "description": "Named entity: PartialD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2202"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pcy", 
+            "description": "Bad named entity: Pcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Pcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pcy;", 
+            "description": "Named entity: Pcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u041f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pfr", 
+            "description": "Bad named entity: Pfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Pfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pfr;", 
+            "description": "Named entity: Pfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd13"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Phi", 
+            "description": "Bad named entity: Phi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Phi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Phi;", 
+            "description": "Named entity: Phi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pi", 
+            "description": "Bad named entity: Pi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Pi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pi;", 
+            "description": "Named entity: Pi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PlusMinus", 
+            "description": "Bad named entity: PlusMinus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&PlusMinus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PlusMinus;", 
+            "description": "Named entity: PlusMinus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Poincareplane", 
+            "description": "Bad named entity: Poincareplane without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Poincareplane"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Poincareplane;", 
+            "description": "Named entity: Poincareplane; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Popf", 
+            "description": "Bad named entity: Popf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Popf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Popf;", 
+            "description": "Named entity: Popf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2119"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pr", 
+            "description": "Bad named entity: Pr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Pr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pr;", 
+            "description": "Named entity: Pr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2abb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Precedes", 
+            "description": "Bad named entity: Precedes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Precedes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Precedes;", 
+            "description": "Named entity: Precedes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesEqual", 
+            "description": "Bad named entity: PrecedesEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&PrecedesEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesEqual;", 
+            "description": "Named entity: PrecedesEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesSlantEqual", 
+            "description": "Bad named entity: PrecedesSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&PrecedesSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesSlantEqual;", 
+            "description": "Named entity: PrecedesSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesTilde", 
+            "description": "Bad named entity: PrecedesTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&PrecedesTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&PrecedesTilde;", 
+            "description": "Named entity: PrecedesTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Prime", 
+            "description": "Bad named entity: Prime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Prime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Prime;", 
+            "description": "Named entity: Prime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2033"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Product", 
+            "description": "Bad named entity: Product without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Product"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Product;", 
+            "description": "Named entity: Product; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Proportion", 
+            "description": "Bad named entity: Proportion without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Proportion"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Proportion;", 
+            "description": "Named entity: Proportion; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2237"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Proportional", 
+            "description": "Bad named entity: Proportional without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Proportional"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Proportional;", 
+            "description": "Named entity: Proportional; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pscr", 
+            "description": "Bad named entity: Pscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Pscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Pscr;", 
+            "description": "Named entity: Pscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Psi", 
+            "description": "Bad named entity: Psi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Psi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Psi;", 
+            "description": "Named entity: Psi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&QUOT", 
+            "description": "Named entity: QUOT without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\""
+                ]
+            ]
+        }, 
+        {
+            "input": "&QUOT;", 
+            "description": "Named entity: QUOT; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\""
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qfr", 
+            "description": "Bad named entity: Qfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Qfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qfr;", 
+            "description": "Named entity: Qfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd14"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qopf", 
+            "description": "Bad named entity: Qopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Qopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qopf;", 
+            "description": "Named entity: Qopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qscr", 
+            "description": "Bad named entity: Qscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Qscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Qscr;", 
+            "description": "Named entity: Qscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RBarr", 
+            "description": "Bad named entity: RBarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RBarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RBarr;", 
+            "description": "Named entity: RBarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2910"
+                ]
+            ]
+        }, 
+        {
+            "input": "&REG", 
+            "description": "Named entity: REG without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&REG;", 
+            "description": "Named entity: REG; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Racute", 
+            "description": "Bad named entity: Racute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Racute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Racute;", 
+            "description": "Named entity: Racute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0154"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rang", 
+            "description": "Bad named entity: Rang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rang;", 
+            "description": "Named entity: Rang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rarr", 
+            "description": "Bad named entity: Rarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rarr;", 
+            "description": "Named entity: Rarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rarrtl", 
+            "description": "Bad named entity: Rarrtl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rarrtl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rarrtl;", 
+            "description": "Named entity: Rarrtl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2916"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcaron", 
+            "description": "Bad named entity: Rcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcaron;", 
+            "description": "Named entity: Rcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0158"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcedil", 
+            "description": "Bad named entity: Rcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcedil;", 
+            "description": "Named entity: Rcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0156"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcy", 
+            "description": "Bad named entity: Rcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rcy;", 
+            "description": "Named entity: Rcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0420"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Re", 
+            "description": "Bad named entity: Re without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Re"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Re;", 
+            "description": "Named entity: Re; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseElement", 
+            "description": "Bad named entity: ReverseElement without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ReverseElement"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseElement;", 
+            "description": "Named entity: ReverseElement; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseEquilibrium", 
+            "description": "Bad named entity: ReverseEquilibrium without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ReverseEquilibrium"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseEquilibrium;", 
+            "description": "Named entity: ReverseEquilibrium; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseUpEquilibrium", 
+            "description": "Bad named entity: ReverseUpEquilibrium without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ReverseUpEquilibrium"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ReverseUpEquilibrium;", 
+            "description": "Named entity: ReverseUpEquilibrium; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rfr", 
+            "description": "Bad named entity: Rfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rfr;", 
+            "description": "Named entity: Rfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rho", 
+            "description": "Bad named entity: Rho without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rho"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rho;", 
+            "description": "Named entity: Rho; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightAngleBracket", 
+            "description": "Bad named entity: RightAngleBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightAngleBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightAngleBracket;", 
+            "description": "Named entity: RightAngleBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrow", 
+            "description": "Bad named entity: RightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrow;", 
+            "description": "Named entity: RightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrowBar", 
+            "description": "Bad named entity: RightArrowBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightArrowBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrowBar;", 
+            "description": "Named entity: RightArrowBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrowLeftArrow", 
+            "description": "Bad named entity: RightArrowLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightArrowLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightArrowLeftArrow;", 
+            "description": "Named entity: RightArrowLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightCeiling", 
+            "description": "Bad named entity: RightCeiling without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightCeiling"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightCeiling;", 
+            "description": "Named entity: RightCeiling; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2309"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDoubleBracket", 
+            "description": "Bad named entity: RightDoubleBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightDoubleBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDoubleBracket;", 
+            "description": "Named entity: RightDoubleBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownTeeVector", 
+            "description": "Bad named entity: RightDownTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightDownTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownTeeVector;", 
+            "description": "Named entity: RightDownTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownVector", 
+            "description": "Bad named entity: RightDownVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightDownVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownVector;", 
+            "description": "Named entity: RightDownVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownVectorBar", 
+            "description": "Bad named entity: RightDownVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightDownVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightDownVectorBar;", 
+            "description": "Named entity: RightDownVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2955"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightFloor", 
+            "description": "Bad named entity: RightFloor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightFloor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightFloor;", 
+            "description": "Named entity: RightFloor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTee", 
+            "description": "Bad named entity: RightTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTee;", 
+            "description": "Named entity: RightTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTeeArrow", 
+            "description": "Bad named entity: RightTeeArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTeeArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTeeArrow;", 
+            "description": "Named entity: RightTeeArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTeeVector", 
+            "description": "Bad named entity: RightTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTeeVector;", 
+            "description": "Named entity: RightTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangle", 
+            "description": "Bad named entity: RightTriangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTriangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangle;", 
+            "description": "Named entity: RightTriangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangleBar", 
+            "description": "Bad named entity: RightTriangleBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTriangleBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangleBar;", 
+            "description": "Named entity: RightTriangleBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangleEqual", 
+            "description": "Bad named entity: RightTriangleEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightTriangleEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightTriangleEqual;", 
+            "description": "Named entity: RightTriangleEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpDownVector", 
+            "description": "Bad named entity: RightUpDownVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightUpDownVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpDownVector;", 
+            "description": "Named entity: RightUpDownVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u294f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpTeeVector", 
+            "description": "Bad named entity: RightUpTeeVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightUpTeeVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpTeeVector;", 
+            "description": "Named entity: RightUpTeeVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u295c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpVector", 
+            "description": "Bad named entity: RightUpVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightUpVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpVector;", 
+            "description": "Named entity: RightUpVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpVectorBar", 
+            "description": "Bad named entity: RightUpVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightUpVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightUpVectorBar;", 
+            "description": "Named entity: RightUpVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2954"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightVector", 
+            "description": "Bad named entity: RightVector without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightVector"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightVector;", 
+            "description": "Named entity: RightVector; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightVectorBar", 
+            "description": "Bad named entity: RightVectorBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RightVectorBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RightVectorBar;", 
+            "description": "Named entity: RightVectorBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2953"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rightarrow", 
+            "description": "Bad named entity: Rightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rightarrow;", 
+            "description": "Named entity: Rightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ropf", 
+            "description": "Bad named entity: Ropf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ropf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ropf;", 
+            "description": "Named entity: Ropf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RoundImplies", 
+            "description": "Bad named entity: RoundImplies without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RoundImplies"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RoundImplies;", 
+            "description": "Named entity: RoundImplies; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2970"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rrightarrow", 
+            "description": "Bad named entity: Rrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rrightarrow;", 
+            "description": "Named entity: Rrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rscr", 
+            "description": "Bad named entity: Rscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rscr;", 
+            "description": "Named entity: Rscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rsh", 
+            "description": "Bad named entity: Rsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Rsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Rsh;", 
+            "description": "Named entity: Rsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RuleDelayed", 
+            "description": "Bad named entity: RuleDelayed without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&RuleDelayed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&RuleDelayed;", 
+            "description": "Named entity: RuleDelayed; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29f4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SHCHcy", 
+            "description": "Bad named entity: SHCHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SHCHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SHCHcy;", 
+            "description": "Named entity: SHCHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0429"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SHcy", 
+            "description": "Bad named entity: SHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SHcy;", 
+            "description": "Named entity: SHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0428"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SOFTcy", 
+            "description": "Bad named entity: SOFTcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SOFTcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SOFTcy;", 
+            "description": "Named entity: SOFTcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sacute", 
+            "description": "Bad named entity: Sacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sacute;", 
+            "description": "Named entity: Sacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sc", 
+            "description": "Bad named entity: Sc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sc;", 
+            "description": "Named entity: Sc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2abc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scaron", 
+            "description": "Bad named entity: Scaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Scaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scaron;", 
+            "description": "Named entity: Scaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0160"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scedil", 
+            "description": "Bad named entity: Scedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Scedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scedil;", 
+            "description": "Named entity: Scedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scirc", 
+            "description": "Bad named entity: Scirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Scirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scirc;", 
+            "description": "Named entity: Scirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scy", 
+            "description": "Bad named entity: Scy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Scy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Scy;", 
+            "description": "Named entity: Scy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0421"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sfr", 
+            "description": "Bad named entity: Sfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sfr;", 
+            "description": "Named entity: Sfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd16"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortDownArrow", 
+            "description": "Bad named entity: ShortDownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ShortDownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortDownArrow;", 
+            "description": "Named entity: ShortDownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2193"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortLeftArrow", 
+            "description": "Bad named entity: ShortLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ShortLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortLeftArrow;", 
+            "description": "Named entity: ShortLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2190"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortRightArrow", 
+            "description": "Bad named entity: ShortRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ShortRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortRightArrow;", 
+            "description": "Named entity: ShortRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortUpArrow", 
+            "description": "Bad named entity: ShortUpArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ShortUpArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ShortUpArrow;", 
+            "description": "Named entity: ShortUpArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2191"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sigma", 
+            "description": "Bad named entity: Sigma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sigma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sigma;", 
+            "description": "Named entity: Sigma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SmallCircle", 
+            "description": "Bad named entity: SmallCircle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SmallCircle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SmallCircle;", 
+            "description": "Named entity: SmallCircle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2218"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sopf", 
+            "description": "Bad named entity: Sopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sopf;", 
+            "description": "Named entity: Sopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sqrt", 
+            "description": "Bad named entity: Sqrt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sqrt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sqrt;", 
+            "description": "Named entity: Sqrt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Square", 
+            "description": "Bad named entity: Square without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Square"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Square;", 
+            "description": "Named entity: Square; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareIntersection", 
+            "description": "Bad named entity: SquareIntersection without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareIntersection"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareIntersection;", 
+            "description": "Named entity: SquareIntersection; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2293"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSubset", 
+            "description": "Bad named entity: SquareSubset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareSubset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSubset;", 
+            "description": "Named entity: SquareSubset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSubsetEqual", 
+            "description": "Bad named entity: SquareSubsetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareSubsetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSubsetEqual;", 
+            "description": "Named entity: SquareSubsetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2291"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSuperset", 
+            "description": "Bad named entity: SquareSuperset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareSuperset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSuperset;", 
+            "description": "Named entity: SquareSuperset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2290"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSupersetEqual", 
+            "description": "Bad named entity: SquareSupersetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareSupersetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareSupersetEqual;", 
+            "description": "Named entity: SquareSupersetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2292"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareUnion", 
+            "description": "Bad named entity: SquareUnion without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SquareUnion"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SquareUnion;", 
+            "description": "Named entity: SquareUnion; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2294"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sscr", 
+            "description": "Bad named entity: Sscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sscr;", 
+            "description": "Named entity: Sscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Star", 
+            "description": "Bad named entity: Star without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Star"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Star;", 
+            "description": "Named entity: Star; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sub", 
+            "description": "Bad named entity: Sub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sub;", 
+            "description": "Named entity: Sub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Subset", 
+            "description": "Bad named entity: Subset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Subset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Subset;", 
+            "description": "Named entity: Subset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SubsetEqual", 
+            "description": "Bad named entity: SubsetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SubsetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SubsetEqual;", 
+            "description": "Named entity: SubsetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2286"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Succeeds", 
+            "description": "Bad named entity: Succeeds without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Succeeds"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Succeeds;", 
+            "description": "Named entity: Succeeds; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsEqual", 
+            "description": "Bad named entity: SucceedsEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SucceedsEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsEqual;", 
+            "description": "Named entity: SucceedsEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsSlantEqual", 
+            "description": "Bad named entity: SucceedsSlantEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SucceedsSlantEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsSlantEqual;", 
+            "description": "Named entity: SucceedsSlantEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsTilde", 
+            "description": "Bad named entity: SucceedsTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SucceedsTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SucceedsTilde;", 
+            "description": "Named entity: SucceedsTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SuchThat", 
+            "description": "Bad named entity: SuchThat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SuchThat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SuchThat;", 
+            "description": "Named entity: SuchThat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sum", 
+            "description": "Bad named entity: Sum without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sum"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sum;", 
+            "description": "Named entity: Sum; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2211"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sup", 
+            "description": "Bad named entity: Sup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Sup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Sup;", 
+            "description": "Named entity: Sup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Superset", 
+            "description": "Bad named entity: Superset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Superset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Superset;", 
+            "description": "Named entity: Superset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SupersetEqual", 
+            "description": "Bad named entity: SupersetEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&SupersetEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&SupersetEqual;", 
+            "description": "Named entity: SupersetEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2287"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Supset", 
+            "description": "Bad named entity: Supset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Supset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Supset;", 
+            "description": "Named entity: Supset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&THORN", 
+            "description": "Named entity: THORN without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&THORN;", 
+            "description": "Named entity: THORN; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TRADE", 
+            "description": "Bad named entity: TRADE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TRADE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TRADE;", 
+            "description": "Named entity: TRADE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2122"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TSHcy", 
+            "description": "Bad named entity: TSHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TSHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TSHcy;", 
+            "description": "Named entity: TSHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u040b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TScy", 
+            "description": "Bad named entity: TScy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TScy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TScy;", 
+            "description": "Named entity: TScy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0426"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tab", 
+            "description": "Bad named entity: Tab without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tab;", 
+            "description": "Named entity: Tab; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\t"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tau", 
+            "description": "Bad named entity: Tau without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tau"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tau;", 
+            "description": "Named entity: Tau; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcaron", 
+            "description": "Bad named entity: Tcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcaron;", 
+            "description": "Named entity: Tcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0164"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcedil", 
+            "description": "Bad named entity: Tcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcedil;", 
+            "description": "Named entity: Tcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0162"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcy", 
+            "description": "Bad named entity: Tcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tcy;", 
+            "description": "Named entity: Tcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0422"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tfr", 
+            "description": "Bad named entity: Tfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tfr;", 
+            "description": "Named entity: Tfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd17"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Therefore", 
+            "description": "Bad named entity: Therefore without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Therefore"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Therefore;", 
+            "description": "Named entity: Therefore; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2234"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Theta", 
+            "description": "Bad named entity: Theta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Theta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Theta;", 
+            "description": "Named entity: Theta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0398"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ThickSpace", 
+            "description": "Bad named entity: ThickSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ThickSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ThickSpace;", 
+            "description": "Named entity: ThickSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u205f\u200a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ThinSpace", 
+            "description": "Bad named entity: ThinSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ThinSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ThinSpace;", 
+            "description": "Named entity: ThinSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2009"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tilde", 
+            "description": "Bad named entity: Tilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tilde;", 
+            "description": "Named entity: Tilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeEqual", 
+            "description": "Bad named entity: TildeEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TildeEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeEqual;", 
+            "description": "Named entity: TildeEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2243"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeFullEqual", 
+            "description": "Bad named entity: TildeFullEqual without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TildeFullEqual"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeFullEqual;", 
+            "description": "Named entity: TildeFullEqual; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2245"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeTilde", 
+            "description": "Bad named entity: TildeTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TildeTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TildeTilde;", 
+            "description": "Named entity: TildeTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Topf", 
+            "description": "Bad named entity: Topf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Topf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Topf;", 
+            "description": "Named entity: Topf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TripleDot", 
+            "description": "Bad named entity: TripleDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&TripleDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&TripleDot;", 
+            "description": "Named entity: TripleDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u20db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tscr", 
+            "description": "Bad named entity: Tscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tscr;", 
+            "description": "Named entity: Tscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tstrok", 
+            "description": "Bad named entity: Tstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Tstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Tstrok;", 
+            "description": "Named entity: Tstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0166"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uacute", 
+            "description": "Named entity: Uacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uacute;", 
+            "description": "Named entity: Uacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uarr", 
+            "description": "Bad named entity: Uarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uarr;", 
+            "description": "Named entity: Uarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uarrocir", 
+            "description": "Bad named entity: Uarrocir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uarrocir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uarrocir;", 
+            "description": "Named entity: Uarrocir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2949"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ubrcy", 
+            "description": "Bad named entity: Ubrcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ubrcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ubrcy;", 
+            "description": "Named entity: Ubrcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u040e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ubreve", 
+            "description": "Bad named entity: Ubreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ubreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ubreve;", 
+            "description": "Named entity: Ubreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ucirc", 
+            "description": "Named entity: Ucirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ucirc;", 
+            "description": "Named entity: Ucirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ucy", 
+            "description": "Bad named entity: Ucy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ucy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ucy;", 
+            "description": "Named entity: Ucy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0423"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Udblac", 
+            "description": "Bad named entity: Udblac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Udblac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Udblac;", 
+            "description": "Named entity: Udblac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0170"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ufr", 
+            "description": "Bad named entity: Ufr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ufr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ufr;", 
+            "description": "Named entity: Ufr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd18"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ugrave", 
+            "description": "Named entity: Ugrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ugrave;", 
+            "description": "Named entity: Ugrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Umacr", 
+            "description": "Bad named entity: Umacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Umacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Umacr;", 
+            "description": "Named entity: Umacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBar", 
+            "description": "Bad named entity: UnderBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UnderBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBar;", 
+            "description": "Named entity: UnderBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "_"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBrace", 
+            "description": "Bad named entity: UnderBrace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UnderBrace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBrace;", 
+            "description": "Named entity: UnderBrace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23df"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBracket", 
+            "description": "Bad named entity: UnderBracket without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UnderBracket"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderBracket;", 
+            "description": "Named entity: UnderBracket; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderParenthesis", 
+            "description": "Bad named entity: UnderParenthesis without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UnderParenthesis"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnderParenthesis;", 
+            "description": "Named entity: UnderParenthesis; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Union", 
+            "description": "Bad named entity: Union without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Union"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Union;", 
+            "description": "Named entity: Union; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnionPlus", 
+            "description": "Bad named entity: UnionPlus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UnionPlus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UnionPlus;", 
+            "description": "Named entity: UnionPlus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uogon", 
+            "description": "Bad named entity: Uogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uogon;", 
+            "description": "Named entity: Uogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0172"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uopf", 
+            "description": "Bad named entity: Uopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uopf;", 
+            "description": "Named entity: Uopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrow", 
+            "description": "Bad named entity: UpArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrow;", 
+            "description": "Named entity: UpArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2191"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrowBar", 
+            "description": "Bad named entity: UpArrowBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpArrowBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrowBar;", 
+            "description": "Named entity: UpArrowBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2912"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrowDownArrow", 
+            "description": "Bad named entity: UpArrowDownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpArrowDownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpArrowDownArrow;", 
+            "description": "Named entity: UpArrowDownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpDownArrow", 
+            "description": "Bad named entity: UpDownArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpDownArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpDownArrow;", 
+            "description": "Named entity: UpDownArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2195"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpEquilibrium", 
+            "description": "Bad named entity: UpEquilibrium without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpEquilibrium"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpEquilibrium;", 
+            "description": "Named entity: UpEquilibrium; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpTee", 
+            "description": "Bad named entity: UpTee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpTee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpTee;", 
+            "description": "Named entity: UpTee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpTeeArrow", 
+            "description": "Bad named entity: UpTeeArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpTeeArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpTeeArrow;", 
+            "description": "Named entity: UpTeeArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uparrow", 
+            "description": "Bad named entity: Uparrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uparrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uparrow;", 
+            "description": "Named entity: Uparrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Updownarrow", 
+            "description": "Bad named entity: Updownarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Updownarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Updownarrow;", 
+            "description": "Named entity: Updownarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpperLeftArrow", 
+            "description": "Bad named entity: UpperLeftArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpperLeftArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpperLeftArrow;", 
+            "description": "Named entity: UpperLeftArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2196"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpperRightArrow", 
+            "description": "Bad named entity: UpperRightArrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&UpperRightArrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&UpperRightArrow;", 
+            "description": "Named entity: UpperRightArrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2197"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Upsi", 
+            "description": "Bad named entity: Upsi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Upsi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Upsi;", 
+            "description": "Named entity: Upsi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Upsilon", 
+            "description": "Bad named entity: Upsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Upsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Upsilon;", 
+            "description": "Named entity: Upsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uring", 
+            "description": "Bad named entity: Uring without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uring"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uring;", 
+            "description": "Named entity: Uring; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uscr", 
+            "description": "Bad named entity: Uscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Uscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uscr;", 
+            "description": "Named entity: Uscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Utilde", 
+            "description": "Bad named entity: Utilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Utilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Utilde;", 
+            "description": "Named entity: Utilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0168"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uuml", 
+            "description": "Named entity: Uuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Uuml;", 
+            "description": "Named entity: Uuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VDash", 
+            "description": "Bad named entity: VDash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VDash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VDash;", 
+            "description": "Named entity: VDash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vbar", 
+            "description": "Bad named entity: Vbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vbar;", 
+            "description": "Named entity: Vbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aeb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vcy", 
+            "description": "Bad named entity: Vcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vcy;", 
+            "description": "Named entity: Vcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0412"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vdash", 
+            "description": "Bad named entity: Vdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vdash;", 
+            "description": "Named entity: Vdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vdashl", 
+            "description": "Bad named entity: Vdashl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vdashl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vdashl;", 
+            "description": "Named entity: Vdashl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vee", 
+            "description": "Bad named entity: Vee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vee;", 
+            "description": "Named entity: Vee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Verbar", 
+            "description": "Bad named entity: Verbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Verbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Verbar;", 
+            "description": "Named entity: Verbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2016"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vert", 
+            "description": "Bad named entity: Vert without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vert"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vert;", 
+            "description": "Named entity: Vert; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2016"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalBar", 
+            "description": "Bad named entity: VerticalBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VerticalBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalBar;", 
+            "description": "Named entity: VerticalBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2223"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalLine", 
+            "description": "Bad named entity: VerticalLine without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VerticalLine"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalLine;", 
+            "description": "Named entity: VerticalLine; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "|"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalSeparator", 
+            "description": "Bad named entity: VerticalSeparator without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VerticalSeparator"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalSeparator;", 
+            "description": "Named entity: VerticalSeparator; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2758"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalTilde", 
+            "description": "Bad named entity: VerticalTilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VerticalTilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VerticalTilde;", 
+            "description": "Named entity: VerticalTilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2240"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VeryThinSpace", 
+            "description": "Bad named entity: VeryThinSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&VeryThinSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&VeryThinSpace;", 
+            "description": "Named entity: VeryThinSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vfr", 
+            "description": "Bad named entity: Vfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vfr;", 
+            "description": "Named entity: Vfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd19"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vopf", 
+            "description": "Bad named entity: Vopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vopf;", 
+            "description": "Named entity: Vopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vscr", 
+            "description": "Bad named entity: Vscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vscr;", 
+            "description": "Named entity: Vscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vvdash", 
+            "description": "Bad named entity: Vvdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Vvdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Vvdash;", 
+            "description": "Named entity: Vvdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wcirc", 
+            "description": "Bad named entity: Wcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Wcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wcirc;", 
+            "description": "Named entity: Wcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0174"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wedge", 
+            "description": "Bad named entity: Wedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Wedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wedge;", 
+            "description": "Named entity: Wedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wfr", 
+            "description": "Bad named entity: Wfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Wfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wfr;", 
+            "description": "Named entity: Wfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd1a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wopf", 
+            "description": "Bad named entity: Wopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Wopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wopf;", 
+            "description": "Named entity: Wopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wscr", 
+            "description": "Bad named entity: Wscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Wscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Wscr;", 
+            "description": "Named entity: Wscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xfr", 
+            "description": "Bad named entity: Xfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Xfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xfr;", 
+            "description": "Named entity: Xfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd1b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xi", 
+            "description": "Bad named entity: Xi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Xi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xi;", 
+            "description": "Named entity: Xi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u039e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xopf", 
+            "description": "Bad named entity: Xopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Xopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xopf;", 
+            "description": "Named entity: Xopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd4f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xscr", 
+            "description": "Bad named entity: Xscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Xscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Xscr;", 
+            "description": "Named entity: Xscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YAcy", 
+            "description": "Bad named entity: YAcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&YAcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YAcy;", 
+            "description": "Named entity: YAcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YIcy", 
+            "description": "Bad named entity: YIcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&YIcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YIcy;", 
+            "description": "Named entity: YIcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0407"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YUcy", 
+            "description": "Bad named entity: YUcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&YUcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&YUcy;", 
+            "description": "Named entity: YUcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yacute", 
+            "description": "Named entity: Yacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yacute;", 
+            "description": "Named entity: Yacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ycirc", 
+            "description": "Bad named entity: Ycirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ycirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ycirc;", 
+            "description": "Named entity: Ycirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0176"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ycy", 
+            "description": "Bad named entity: Ycy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Ycy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Ycy;", 
+            "description": "Named entity: Ycy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u042b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yfr", 
+            "description": "Bad named entity: Yfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Yfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yfr;", 
+            "description": "Named entity: Yfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd1c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yopf", 
+            "description": "Bad named entity: Yopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Yopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yopf;", 
+            "description": "Named entity: Yopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd50"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yscr", 
+            "description": "Bad named entity: Yscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Yscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yscr;", 
+            "description": "Named entity: Yscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yuml", 
+            "description": "Bad named entity: Yuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Yuml"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Yuml;", 
+            "description": "Named entity: Yuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0178"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ZHcy", 
+            "description": "Bad named entity: ZHcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ZHcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ZHcy;", 
+            "description": "Named entity: ZHcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0416"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zacute", 
+            "description": "Bad named entity: Zacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zacute;", 
+            "description": "Named entity: Zacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0179"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zcaron", 
+            "description": "Bad named entity: Zcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zcaron;", 
+            "description": "Named entity: Zcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u017d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zcy", 
+            "description": "Bad named entity: Zcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zcy;", 
+            "description": "Named entity: Zcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0417"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zdot", 
+            "description": "Bad named entity: Zdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zdot;", 
+            "description": "Named entity: Zdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u017b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ZeroWidthSpace", 
+            "description": "Bad named entity: ZeroWidthSpace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ZeroWidthSpace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ZeroWidthSpace;", 
+            "description": "Named entity: ZeroWidthSpace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zeta", 
+            "description": "Bad named entity: Zeta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zeta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zeta;", 
+            "description": "Named entity: Zeta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0396"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zfr", 
+            "description": "Bad named entity: Zfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zfr;", 
+            "description": "Named entity: Zfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2128"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zopf", 
+            "description": "Bad named entity: Zopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zopf;", 
+            "description": "Named entity: Zopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2124"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zscr", 
+            "description": "Bad named entity: Zscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&Zscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&Zscr;", 
+            "description": "Named entity: Zscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aacute", 
+            "description": "Named entity: aacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aacute;", 
+            "description": "Named entity: aacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&abreve", 
+            "description": "Bad named entity: abreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&abreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&abreve;", 
+            "description": "Named entity: abreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0103"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ac", 
+            "description": "Bad named entity: ac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ac;", 
+            "description": "Named entity: ac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acE", 
+            "description": "Bad named entity: acE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&acE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acE;", 
+            "description": "Named entity: acE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223e\u0333"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acd", 
+            "description": "Bad named entity: acd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&acd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acd;", 
+            "description": "Named entity: acd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acirc", 
+            "description": "Named entity: acirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acirc;", 
+            "description": "Named entity: acirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acute", 
+            "description": "Named entity: acute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acute;", 
+            "description": "Named entity: acute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acy", 
+            "description": "Bad named entity: acy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&acy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&acy;", 
+            "description": "Named entity: acy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0430"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aelig", 
+            "description": "Named entity: aelig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aelig;", 
+            "description": "Named entity: aelig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&af", 
+            "description": "Bad named entity: af without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&af;", 
+            "description": "Named entity: af; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2061"
+                ]
+            ]
+        }, 
+        {
+            "input": "&afr", 
+            "description": "Bad named entity: afr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&afr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&afr;", 
+            "description": "Named entity: afr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd1e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&agrave", 
+            "description": "Named entity: agrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&agrave;", 
+            "description": "Named entity: agrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&alefsym", 
+            "description": "Bad named entity: alefsym without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&alefsym"
+                ]
+            ]
+        }, 
+        {
+            "input": "&alefsym;", 
+            "description": "Named entity: alefsym; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2135"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aleph", 
+            "description": "Bad named entity: aleph without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&aleph"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aleph;", 
+            "description": "Named entity: aleph; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2135"
+                ]
+            ]
+        }, 
+        {
+            "input": "&alpha", 
+            "description": "Bad named entity: alpha without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&alpha"
+                ]
+            ]
+        }, 
+        {
+            "input": "&alpha;", 
+            "description": "Named entity: alpha; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amacr", 
+            "description": "Bad named entity: amacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&amacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amacr;", 
+            "description": "Named entity: amacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0101"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amalg", 
+            "description": "Bad named entity: amalg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&amalg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amalg;", 
+            "description": "Named entity: amalg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a3f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amp", 
+            "description": "Named entity: amp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&"
+                ]
+            ]
+        }, 
+        {
+            "input": "&amp;", 
+            "description": "Named entity: amp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "&"
+                ]
+            ]
+        }, 
+        {
+            "input": "&and", 
+            "description": "Bad named entity: and without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&and"
+                ]
+            ]
+        }, 
+        {
+            "input": "&and;", 
+            "description": "Named entity: and; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2227"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andand", 
+            "description": "Bad named entity: andand without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&andand"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andand;", 
+            "description": "Named entity: andand; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a55"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andd", 
+            "description": "Bad named entity: andd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&andd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andd;", 
+            "description": "Named entity: andd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a5c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andslope", 
+            "description": "Bad named entity: andslope without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&andslope"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andslope;", 
+            "description": "Named entity: andslope; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a58"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andv", 
+            "description": "Bad named entity: andv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&andv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&andv;", 
+            "description": "Named entity: andv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a5a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ang", 
+            "description": "Bad named entity: ang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ang;", 
+            "description": "Named entity: ang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2220"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ange", 
+            "description": "Bad named entity: ange without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ange"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ange;", 
+            "description": "Named entity: ange; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angle", 
+            "description": "Bad named entity: angle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angle;", 
+            "description": "Named entity: angle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2220"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsd", 
+            "description": "Bad named entity: angmsd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsd;", 
+            "description": "Named entity: angmsd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2221"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdaa", 
+            "description": "Bad named entity: angmsdaa without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdaa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdaa;", 
+            "description": "Named entity: angmsdaa; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdab", 
+            "description": "Bad named entity: angmsdab without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdab;", 
+            "description": "Named entity: angmsdab; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdac", 
+            "description": "Bad named entity: angmsdac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdac;", 
+            "description": "Named entity: angmsdac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdad", 
+            "description": "Bad named entity: angmsdad without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdad;", 
+            "description": "Named entity: angmsdad; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdae", 
+            "description": "Bad named entity: angmsdae without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdae;", 
+            "description": "Named entity: angmsdae; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdaf", 
+            "description": "Bad named entity: angmsdaf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdaf;", 
+            "description": "Named entity: angmsdaf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdag", 
+            "description": "Bad named entity: angmsdag without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdag"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdag;", 
+            "description": "Named entity: angmsdag; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdah", 
+            "description": "Bad named entity: angmsdah without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angmsdah"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angmsdah;", 
+            "description": "Named entity: angmsdah; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrt", 
+            "description": "Bad named entity: angrt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angrt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrt;", 
+            "description": "Named entity: angrt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrtvb", 
+            "description": "Bad named entity: angrtvb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angrtvb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrtvb;", 
+            "description": "Named entity: angrtvb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrtvbd", 
+            "description": "Bad named entity: angrtvbd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angrtvbd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angrtvbd;", 
+            "description": "Named entity: angrtvbd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u299d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angsph", 
+            "description": "Bad named entity: angsph without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angsph"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angsph;", 
+            "description": "Named entity: angsph; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2222"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angst", 
+            "description": "Bad named entity: angst without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angst"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angst;", 
+            "description": "Named entity: angst; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angzarr", 
+            "description": "Bad named entity: angzarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&angzarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&angzarr;", 
+            "description": "Named entity: angzarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u237c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aogon", 
+            "description": "Bad named entity: aogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&aogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aogon;", 
+            "description": "Named entity: aogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0105"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aopf", 
+            "description": "Bad named entity: aopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&aopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aopf;", 
+            "description": "Named entity: aopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd52"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ap", 
+            "description": "Bad named entity: ap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ap;", 
+            "description": "Named entity: ap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apE", 
+            "description": "Bad named entity: apE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&apE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apE;", 
+            "description": "Named entity: apE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a70"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apacir", 
+            "description": "Bad named entity: apacir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&apacir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apacir;", 
+            "description": "Named entity: apacir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a6f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ape", 
+            "description": "Bad named entity: ape without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ape"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ape;", 
+            "description": "Named entity: ape; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apid", 
+            "description": "Bad named entity: apid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&apid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apid;", 
+            "description": "Named entity: apid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apos", 
+            "description": "Bad named entity: apos without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&apos"
+                ]
+            ]
+        }, 
+        {
+            "input": "&apos;", 
+            "description": "Named entity: apos; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "'"
+                ]
+            ]
+        }, 
+        {
+            "input": "&approx", 
+            "description": "Bad named entity: approx without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&approx"
+                ]
+            ]
+        }, 
+        {
+            "input": "&approx;", 
+            "description": "Named entity: approx; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&approxeq", 
+            "description": "Bad named entity: approxeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&approxeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&approxeq;", 
+            "description": "Named entity: approxeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aring", 
+            "description": "Named entity: aring without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&aring;", 
+            "description": "Named entity: aring; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ascr", 
+            "description": "Bad named entity: ascr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ascr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ascr;", 
+            "description": "Named entity: ascr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ast", 
+            "description": "Bad named entity: ast without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ast"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ast;", 
+            "description": "Named entity: ast; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "*"
+                ]
+            ]
+        }, 
+        {
+            "input": "&asymp", 
+            "description": "Bad named entity: asymp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&asymp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&asymp;", 
+            "description": "Named entity: asymp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&asympeq", 
+            "description": "Bad named entity: asympeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&asympeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&asympeq;", 
+            "description": "Named entity: asympeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&atilde", 
+            "description": "Named entity: atilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&atilde;", 
+            "description": "Named entity: atilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&auml", 
+            "description": "Named entity: auml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&auml;", 
+            "description": "Named entity: auml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&awconint", 
+            "description": "Bad named entity: awconint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&awconint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&awconint;", 
+            "description": "Named entity: awconint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2233"
+                ]
+            ]
+        }, 
+        {
+            "input": "&awint", 
+            "description": "Bad named entity: awint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&awint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&awint;", 
+            "description": "Named entity: awint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a11"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bNot", 
+            "description": "Bad named entity: bNot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bNot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bNot;", 
+            "description": "Named entity: bNot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backcong", 
+            "description": "Bad named entity: backcong without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&backcong"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backcong;", 
+            "description": "Named entity: backcong; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backepsilon", 
+            "description": "Bad named entity: backepsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&backepsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backepsilon;", 
+            "description": "Named entity: backepsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backprime", 
+            "description": "Bad named entity: backprime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&backprime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backprime;", 
+            "description": "Named entity: backprime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2035"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backsim", 
+            "description": "Bad named entity: backsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&backsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backsim;", 
+            "description": "Named entity: backsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backsimeq", 
+            "description": "Bad named entity: backsimeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&backsimeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&backsimeq;", 
+            "description": "Named entity: backsimeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barvee", 
+            "description": "Bad named entity: barvee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&barvee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barvee;", 
+            "description": "Named entity: barvee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barwed", 
+            "description": "Bad named entity: barwed without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&barwed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barwed;", 
+            "description": "Named entity: barwed; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2305"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barwedge", 
+            "description": "Bad named entity: barwedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&barwedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&barwedge;", 
+            "description": "Named entity: barwedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2305"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bbrk", 
+            "description": "Bad named entity: bbrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bbrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bbrk;", 
+            "description": "Named entity: bbrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bbrktbrk", 
+            "description": "Bad named entity: bbrktbrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bbrktbrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bbrktbrk;", 
+            "description": "Named entity: bbrktbrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bcong", 
+            "description": "Bad named entity: bcong without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bcong"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bcong;", 
+            "description": "Named entity: bcong; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bcy", 
+            "description": "Bad named entity: bcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bcy;", 
+            "description": "Named entity: bcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0431"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bdquo", 
+            "description": "Bad named entity: bdquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bdquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bdquo;", 
+            "description": "Named entity: bdquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&becaus", 
+            "description": "Bad named entity: becaus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&becaus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&becaus;", 
+            "description": "Named entity: becaus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2235"
+                ]
+            ]
+        }, 
+        {
+            "input": "&because", 
+            "description": "Bad named entity: because without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&because"
+                ]
+            ]
+        }, 
+        {
+            "input": "&because;", 
+            "description": "Named entity: because; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2235"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bemptyv", 
+            "description": "Bad named entity: bemptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bemptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bemptyv;", 
+            "description": "Named entity: bemptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bepsi", 
+            "description": "Bad named entity: bepsi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bepsi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bepsi;", 
+            "description": "Named entity: bepsi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bernou", 
+            "description": "Bad named entity: bernou without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bernou"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bernou;", 
+            "description": "Named entity: bernou; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&beta", 
+            "description": "Bad named entity: beta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&beta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&beta;", 
+            "description": "Named entity: beta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&beth", 
+            "description": "Bad named entity: beth without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&beth"
+                ]
+            ]
+        }, 
+        {
+            "input": "&beth;", 
+            "description": "Named entity: beth; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2136"
+                ]
+            ]
+        }, 
+        {
+            "input": "&between", 
+            "description": "Bad named entity: between without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&between"
+                ]
+            ]
+        }, 
+        {
+            "input": "&between;", 
+            "description": "Named entity: between; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bfr", 
+            "description": "Bad named entity: bfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bfr;", 
+            "description": "Named entity: bfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd1f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcap", 
+            "description": "Bad named entity: bigcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcap;", 
+            "description": "Named entity: bigcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcirc", 
+            "description": "Bad named entity: bigcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcirc;", 
+            "description": "Named entity: bigcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcup", 
+            "description": "Bad named entity: bigcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigcup;", 
+            "description": "Named entity: bigcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigodot", 
+            "description": "Bad named entity: bigodot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigodot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigodot;", 
+            "description": "Named entity: bigodot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigoplus", 
+            "description": "Bad named entity: bigoplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigoplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigoplus;", 
+            "description": "Named entity: bigoplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a01"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigotimes", 
+            "description": "Bad named entity: bigotimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigotimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigotimes;", 
+            "description": "Named entity: bigotimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a02"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigsqcup", 
+            "description": "Bad named entity: bigsqcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigsqcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigsqcup;", 
+            "description": "Named entity: bigsqcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a06"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigstar", 
+            "description": "Bad named entity: bigstar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigstar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigstar;", 
+            "description": "Named entity: bigstar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2605"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigtriangledown", 
+            "description": "Bad named entity: bigtriangledown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigtriangledown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigtriangledown;", 
+            "description": "Named entity: bigtriangledown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigtriangleup", 
+            "description": "Bad named entity: bigtriangleup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigtriangleup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigtriangleup;", 
+            "description": "Named entity: bigtriangleup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&biguplus", 
+            "description": "Bad named entity: biguplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&biguplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&biguplus;", 
+            "description": "Named entity: biguplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a04"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigvee", 
+            "description": "Bad named entity: bigvee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigvee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigvee;", 
+            "description": "Named entity: bigvee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigwedge", 
+            "description": "Bad named entity: bigwedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bigwedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bigwedge;", 
+            "description": "Named entity: bigwedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bkarow", 
+            "description": "Bad named entity: bkarow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bkarow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bkarow;", 
+            "description": "Named entity: bkarow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacklozenge", 
+            "description": "Bad named entity: blacklozenge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacklozenge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacklozenge;", 
+            "description": "Named entity: blacklozenge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacksquare", 
+            "description": "Bad named entity: blacksquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacksquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacksquare;", 
+            "description": "Named entity: blacksquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangle", 
+            "description": "Bad named entity: blacktriangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacktriangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangle;", 
+            "description": "Named entity: blacktriangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangledown", 
+            "description": "Bad named entity: blacktriangledown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacktriangledown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangledown;", 
+            "description": "Named entity: blacktriangledown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangleleft", 
+            "description": "Bad named entity: blacktriangleleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacktriangleleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangleleft;", 
+            "description": "Named entity: blacktriangleleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangleright", 
+            "description": "Bad named entity: blacktriangleright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blacktriangleright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blacktriangleright;", 
+            "description": "Named entity: blacktriangleright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blank", 
+            "description": "Bad named entity: blank without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blank"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blank;", 
+            "description": "Named entity: blank; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2423"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk12", 
+            "description": "Bad named entity: blk12 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blk12"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk12;", 
+            "description": "Named entity: blk12; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2592"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk14", 
+            "description": "Bad named entity: blk14 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blk14"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk14;", 
+            "description": "Named entity: blk14; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2591"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk34", 
+            "description": "Bad named entity: blk34 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&blk34"
+                ]
+            ]
+        }, 
+        {
+            "input": "&blk34;", 
+            "description": "Named entity: blk34; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2593"
+                ]
+            ]
+        }, 
+        {
+            "input": "&block", 
+            "description": "Bad named entity: block without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&block"
+                ]
+            ]
+        }, 
+        {
+            "input": "&block;", 
+            "description": "Named entity: block; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2588"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bne", 
+            "description": "Bad named entity: bne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bne;", 
+            "description": "Named entity: bne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "=\u20e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bnequiv", 
+            "description": "Bad named entity: bnequiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bnequiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bnequiv;", 
+            "description": "Named entity: bnequiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2261\u20e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bnot", 
+            "description": "Bad named entity: bnot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bnot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bnot;", 
+            "description": "Named entity: bnot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2310"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bopf", 
+            "description": "Bad named entity: bopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bopf;", 
+            "description": "Named entity: bopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd53"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bot", 
+            "description": "Bad named entity: bot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bot;", 
+            "description": "Named entity: bot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bottom", 
+            "description": "Bad named entity: bottom without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bottom"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bottom;", 
+            "description": "Named entity: bottom; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bowtie", 
+            "description": "Bad named entity: bowtie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bowtie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bowtie;", 
+            "description": "Named entity: bowtie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDL", 
+            "description": "Bad named entity: boxDL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxDL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDL;", 
+            "description": "Named entity: boxDL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2557"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDR", 
+            "description": "Bad named entity: boxDR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxDR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDR;", 
+            "description": "Named entity: boxDR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2554"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDl", 
+            "description": "Bad named entity: boxDl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxDl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDl;", 
+            "description": "Named entity: boxDl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2556"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDr", 
+            "description": "Bad named entity: boxDr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxDr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxDr;", 
+            "description": "Named entity: boxDr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2553"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxH", 
+            "description": "Bad named entity: boxH without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxH"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxH;", 
+            "description": "Named entity: boxH; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2550"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHD", 
+            "description": "Bad named entity: boxHD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxHD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHD;", 
+            "description": "Named entity: boxHD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2566"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHU", 
+            "description": "Bad named entity: boxHU without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxHU"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHU;", 
+            "description": "Named entity: boxHU; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2569"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHd", 
+            "description": "Bad named entity: boxHd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxHd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHd;", 
+            "description": "Named entity: boxHd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2564"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHu", 
+            "description": "Bad named entity: boxHu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxHu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxHu;", 
+            "description": "Named entity: boxHu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2567"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUL", 
+            "description": "Bad named entity: boxUL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxUL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUL;", 
+            "description": "Named entity: boxUL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUR", 
+            "description": "Bad named entity: boxUR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxUR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUR;", 
+            "description": "Named entity: boxUR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUl", 
+            "description": "Bad named entity: boxUl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxUl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUl;", 
+            "description": "Named entity: boxUl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUr", 
+            "description": "Bad named entity: boxUr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxUr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxUr;", 
+            "description": "Named entity: boxUr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2559"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxV", 
+            "description": "Bad named entity: boxV without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxV"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxV;", 
+            "description": "Named entity: boxV; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2551"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVH", 
+            "description": "Bad named entity: boxVH without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVH"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVH;", 
+            "description": "Named entity: boxVH; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u256c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVL", 
+            "description": "Bad named entity: boxVL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVL;", 
+            "description": "Named entity: boxVL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2563"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVR", 
+            "description": "Bad named entity: boxVR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVR;", 
+            "description": "Named entity: boxVR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2560"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVh", 
+            "description": "Bad named entity: boxVh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVh;", 
+            "description": "Named entity: boxVh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u256b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVl", 
+            "description": "Bad named entity: boxVl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVl;", 
+            "description": "Named entity: boxVl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2562"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVr", 
+            "description": "Bad named entity: boxVr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxVr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxVr;", 
+            "description": "Named entity: boxVr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxbox", 
+            "description": "Bad named entity: boxbox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxbox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxbox;", 
+            "description": "Named entity: boxbox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdL", 
+            "description": "Bad named entity: boxdL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxdL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdL;", 
+            "description": "Named entity: boxdL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2555"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdR", 
+            "description": "Bad named entity: boxdR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxdR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdR;", 
+            "description": "Named entity: boxdR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2552"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdl", 
+            "description": "Bad named entity: boxdl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxdl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdl;", 
+            "description": "Named entity: boxdl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2510"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdr", 
+            "description": "Bad named entity: boxdr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxdr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxdr;", 
+            "description": "Named entity: boxdr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u250c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxh", 
+            "description": "Bad named entity: boxh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxh;", 
+            "description": "Named entity: boxh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2500"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhD", 
+            "description": "Bad named entity: boxhD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxhD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhD;", 
+            "description": "Named entity: boxhD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2565"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhU", 
+            "description": "Bad named entity: boxhU without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxhU"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhU;", 
+            "description": "Named entity: boxhU; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2568"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhd", 
+            "description": "Bad named entity: boxhd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxhd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhd;", 
+            "description": "Named entity: boxhd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u252c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhu", 
+            "description": "Bad named entity: boxhu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxhu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxhu;", 
+            "description": "Named entity: boxhu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2534"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxminus", 
+            "description": "Bad named entity: boxminus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxminus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxminus;", 
+            "description": "Named entity: boxminus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxplus", 
+            "description": "Bad named entity: boxplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxplus;", 
+            "description": "Named entity: boxplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxtimes", 
+            "description": "Bad named entity: boxtimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxtimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxtimes;", 
+            "description": "Named entity: boxtimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxuL", 
+            "description": "Bad named entity: boxuL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxuL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxuL;", 
+            "description": "Named entity: boxuL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxuR", 
+            "description": "Bad named entity: boxuR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxuR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxuR;", 
+            "description": "Named entity: boxuR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2558"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxul", 
+            "description": "Bad named entity: boxul without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxul"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxul;", 
+            "description": "Named entity: boxul; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2518"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxur", 
+            "description": "Bad named entity: boxur without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxur"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxur;", 
+            "description": "Named entity: boxur; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2514"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxv", 
+            "description": "Bad named entity: boxv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxv;", 
+            "description": "Named entity: boxv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2502"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvH", 
+            "description": "Bad named entity: boxvH without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvH"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvH;", 
+            "description": "Named entity: boxvH; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u256a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvL", 
+            "description": "Bad named entity: boxvL without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvL"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvL;", 
+            "description": "Named entity: boxvL; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2561"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvR", 
+            "description": "Bad named entity: boxvR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvR;", 
+            "description": "Named entity: boxvR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u255e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvh", 
+            "description": "Bad named entity: boxvh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvh;", 
+            "description": "Named entity: boxvh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u253c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvl", 
+            "description": "Bad named entity: boxvl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvl;", 
+            "description": "Named entity: boxvl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2524"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvr", 
+            "description": "Bad named entity: boxvr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&boxvr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&boxvr;", 
+            "description": "Named entity: boxvr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u251c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bprime", 
+            "description": "Bad named entity: bprime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bprime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bprime;", 
+            "description": "Named entity: bprime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2035"
+                ]
+            ]
+        }, 
+        {
+            "input": "&breve", 
+            "description": "Bad named entity: breve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&breve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&breve;", 
+            "description": "Named entity: breve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&brvbar", 
+            "description": "Named entity: brvbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&brvbar;", 
+            "description": "Named entity: brvbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bscr", 
+            "description": "Bad named entity: bscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bscr;", 
+            "description": "Named entity: bscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsemi", 
+            "description": "Bad named entity: bsemi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsemi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsemi;", 
+            "description": "Named entity: bsemi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u204f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsim", 
+            "description": "Bad named entity: bsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsim;", 
+            "description": "Named entity: bsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsime", 
+            "description": "Bad named entity: bsime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsime;", 
+            "description": "Named entity: bsime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsol", 
+            "description": "Bad named entity: bsol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsol;", 
+            "description": "Named entity: bsol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\\"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsolb", 
+            "description": "Bad named entity: bsolb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsolb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsolb;", 
+            "description": "Named entity: bsolb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsolhsub", 
+            "description": "Bad named entity: bsolhsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bsolhsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bsolhsub;", 
+            "description": "Named entity: bsolhsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bull", 
+            "description": "Bad named entity: bull without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bull"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bull;", 
+            "description": "Named entity: bull; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2022"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bullet", 
+            "description": "Bad named entity: bullet without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bullet"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bullet;", 
+            "description": "Named entity: bullet; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2022"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bump", 
+            "description": "Bad named entity: bump without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bump"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bump;", 
+            "description": "Named entity: bump; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpE", 
+            "description": "Bad named entity: bumpE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bumpE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpE;", 
+            "description": "Named entity: bumpE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpe", 
+            "description": "Bad named entity: bumpe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bumpe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpe;", 
+            "description": "Named entity: bumpe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpeq", 
+            "description": "Bad named entity: bumpeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&bumpeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&bumpeq;", 
+            "description": "Named entity: bumpeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cacute", 
+            "description": "Bad named entity: cacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cacute;", 
+            "description": "Named entity: cacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0107"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cap", 
+            "description": "Bad named entity: cap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cap;", 
+            "description": "Named entity: cap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2229"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capand", 
+            "description": "Bad named entity: capand without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&capand"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capand;", 
+            "description": "Named entity: capand; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a44"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capbrcup", 
+            "description": "Bad named entity: capbrcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&capbrcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capbrcup;", 
+            "description": "Named entity: capbrcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a49"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capcap", 
+            "description": "Bad named entity: capcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&capcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capcap;", 
+            "description": "Named entity: capcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a4b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capcup", 
+            "description": "Bad named entity: capcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&capcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capcup;", 
+            "description": "Named entity: capcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a47"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capdot", 
+            "description": "Bad named entity: capdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&capdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&capdot;", 
+            "description": "Named entity: capdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a40"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caps", 
+            "description": "Bad named entity: caps without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&caps"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caps;", 
+            "description": "Named entity: caps; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2229\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caret", 
+            "description": "Bad named entity: caret without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&caret"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caret;", 
+            "description": "Named entity: caret; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2041"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caron", 
+            "description": "Bad named entity: caron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&caron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&caron;", 
+            "description": "Named entity: caron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccaps", 
+            "description": "Bad named entity: ccaps without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ccaps"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccaps;", 
+            "description": "Named entity: ccaps; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a4d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccaron", 
+            "description": "Bad named entity: ccaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ccaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccaron;", 
+            "description": "Named entity: ccaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccedil", 
+            "description": "Named entity: ccedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccedil;", 
+            "description": "Named entity: ccedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccirc", 
+            "description": "Bad named entity: ccirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ccirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccirc;", 
+            "description": "Named entity: ccirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0109"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccups", 
+            "description": "Bad named entity: ccups without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ccups"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccups;", 
+            "description": "Named entity: ccups; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a4c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccupssm", 
+            "description": "Bad named entity: ccupssm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ccupssm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ccupssm;", 
+            "description": "Named entity: ccupssm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a50"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cdot", 
+            "description": "Bad named entity: cdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cdot;", 
+            "description": "Named entity: cdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cedil", 
+            "description": "Named entity: cedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cedil;", 
+            "description": "Named entity: cedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cemptyv", 
+            "description": "Bad named entity: cemptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cemptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cemptyv;", 
+            "description": "Named entity: cemptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cent", 
+            "description": "Named entity: cent without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cent;", 
+            "description": "Named entity: cent; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&centerdot;", 
+            "description": "Named entity: centerdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cfr", 
+            "description": "Bad named entity: cfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cfr;", 
+            "description": "Named entity: cfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd20"
+                ]
+            ]
+        }, 
+        {
+            "input": "&chcy", 
+            "description": "Bad named entity: chcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&chcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&chcy;", 
+            "description": "Named entity: chcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0447"
+                ]
+            ]
+        }, 
+        {
+            "input": "&check", 
+            "description": "Bad named entity: check without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&check"
+                ]
+            ]
+        }, 
+        {
+            "input": "&check;", 
+            "description": "Named entity: check; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2713"
+                ]
+            ]
+        }, 
+        {
+            "input": "&checkmark", 
+            "description": "Bad named entity: checkmark without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&checkmark"
+                ]
+            ]
+        }, 
+        {
+            "input": "&checkmark;", 
+            "description": "Named entity: checkmark; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2713"
+                ]
+            ]
+        }, 
+        {
+            "input": "&chi", 
+            "description": "Bad named entity: chi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&chi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&chi;", 
+            "description": "Named entity: chi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cir", 
+            "description": "Bad named entity: cir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cir;", 
+            "description": "Named entity: cir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirE", 
+            "description": "Bad named entity: cirE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cirE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirE;", 
+            "description": "Named entity: cirE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circ", 
+            "description": "Bad named entity: circ without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circ"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circ;", 
+            "description": "Named entity: circ; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circeq", 
+            "description": "Bad named entity: circeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circeq;", 
+            "description": "Named entity: circeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2257"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circlearrowleft", 
+            "description": "Bad named entity: circlearrowleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circlearrowleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circlearrowleft;", 
+            "description": "Named entity: circlearrowleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circlearrowright", 
+            "description": "Bad named entity: circlearrowright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circlearrowright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circlearrowright;", 
+            "description": "Named entity: circlearrowright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledR", 
+            "description": "Bad named entity: circledR without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circledR"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledR;", 
+            "description": "Named entity: circledR; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledS", 
+            "description": "Bad named entity: circledS without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circledS"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledS;", 
+            "description": "Named entity: circledS; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u24c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledast", 
+            "description": "Bad named entity: circledast without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circledast"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledast;", 
+            "description": "Named entity: circledast; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledcirc", 
+            "description": "Bad named entity: circledcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circledcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circledcirc;", 
+            "description": "Named entity: circledcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circleddash", 
+            "description": "Bad named entity: circleddash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&circleddash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&circleddash;", 
+            "description": "Named entity: circleddash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cire", 
+            "description": "Bad named entity: cire without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cire"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cire;", 
+            "description": "Named entity: cire; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2257"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirfnint", 
+            "description": "Bad named entity: cirfnint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cirfnint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirfnint;", 
+            "description": "Named entity: cirfnint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a10"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirmid", 
+            "description": "Bad named entity: cirmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cirmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirmid;", 
+            "description": "Named entity: cirmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirscir", 
+            "description": "Bad named entity: cirscir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cirscir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cirscir;", 
+            "description": "Named entity: cirscir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&clubs", 
+            "description": "Bad named entity: clubs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&clubs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&clubs;", 
+            "description": "Named entity: clubs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2663"
+                ]
+            ]
+        }, 
+        {
+            "input": "&clubsuit", 
+            "description": "Bad named entity: clubsuit without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&clubsuit"
+                ]
+            ]
+        }, 
+        {
+            "input": "&clubsuit;", 
+            "description": "Named entity: clubsuit; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2663"
+                ]
+            ]
+        }, 
+        {
+            "input": "&colon", 
+            "description": "Bad named entity: colon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&colon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&colon;", 
+            "description": "Named entity: colon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ":"
+                ]
+            ]
+        }, 
+        {
+            "input": "&colone", 
+            "description": "Bad named entity: colone without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&colone"
+                ]
+            ]
+        }, 
+        {
+            "input": "&colone;", 
+            "description": "Named entity: colone; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2254"
+                ]
+            ]
+        }, 
+        {
+            "input": "&coloneq", 
+            "description": "Bad named entity: coloneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&coloneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&coloneq;", 
+            "description": "Named entity: coloneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2254"
+                ]
+            ]
+        }, 
+        {
+            "input": "&comma", 
+            "description": "Bad named entity: comma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&comma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&comma;", 
+            "description": "Named entity: comma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ","
+                ]
+            ]
+        }, 
+        {
+            "input": "&commat", 
+            "description": "Bad named entity: commat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&commat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&commat;", 
+            "description": "Named entity: commat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "@"
+                ]
+            ]
+        }, 
+        {
+            "input": "&comp", 
+            "description": "Bad named entity: comp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&comp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&comp;", 
+            "description": "Named entity: comp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2201"
+                ]
+            ]
+        }, 
+        {
+            "input": "&compfn", 
+            "description": "Bad named entity: compfn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&compfn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&compfn;", 
+            "description": "Named entity: compfn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2218"
+                ]
+            ]
+        }, 
+        {
+            "input": "&complement", 
+            "description": "Bad named entity: complement without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&complement"
+                ]
+            ]
+        }, 
+        {
+            "input": "&complement;", 
+            "description": "Named entity: complement; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2201"
+                ]
+            ]
+        }, 
+        {
+            "input": "&complexes", 
+            "description": "Bad named entity: complexes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&complexes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&complexes;", 
+            "description": "Named entity: complexes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2102"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cong", 
+            "description": "Bad named entity: cong without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cong"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cong;", 
+            "description": "Named entity: cong; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2245"
+                ]
+            ]
+        }, 
+        {
+            "input": "&congdot", 
+            "description": "Bad named entity: congdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&congdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&congdot;", 
+            "description": "Named entity: congdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a6d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&conint", 
+            "description": "Bad named entity: conint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&conint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&conint;", 
+            "description": "Named entity: conint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&copf", 
+            "description": "Bad named entity: copf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&copf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&copf;", 
+            "description": "Named entity: copf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd54"
+                ]
+            ]
+        }, 
+        {
+            "input": "&coprod", 
+            "description": "Bad named entity: coprod without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&coprod"
+                ]
+            ]
+        }, 
+        {
+            "input": "&coprod;", 
+            "description": "Named entity: coprod; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2210"
+                ]
+            ]
+        }, 
+        {
+            "input": "&copy", 
+            "description": "Named entity: copy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&copy;", 
+            "description": "Named entity: copy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&copysr;", 
+            "description": "Named entity: copysr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2117"
+                ]
+            ]
+        }, 
+        {
+            "input": "&crarr", 
+            "description": "Bad named entity: crarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&crarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&crarr;", 
+            "description": "Named entity: crarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cross", 
+            "description": "Bad named entity: cross without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cross"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cross;", 
+            "description": "Named entity: cross; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2717"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cscr", 
+            "description": "Bad named entity: cscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cscr;", 
+            "description": "Named entity: cscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csub", 
+            "description": "Bad named entity: csub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&csub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csub;", 
+            "description": "Named entity: csub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csube", 
+            "description": "Bad named entity: csube without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&csube"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csube;", 
+            "description": "Named entity: csube; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csup", 
+            "description": "Bad named entity: csup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&csup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csup;", 
+            "description": "Named entity: csup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csupe", 
+            "description": "Bad named entity: csupe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&csupe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&csupe;", 
+            "description": "Named entity: csupe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ctdot", 
+            "description": "Bad named entity: ctdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ctdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ctdot;", 
+            "description": "Named entity: ctdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cudarrl", 
+            "description": "Bad named entity: cudarrl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cudarrl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cudarrl;", 
+            "description": "Named entity: cudarrl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2938"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cudarrr", 
+            "description": "Bad named entity: cudarrr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cudarrr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cudarrr;", 
+            "description": "Named entity: cudarrr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2935"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuepr", 
+            "description": "Bad named entity: cuepr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cuepr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuepr;", 
+            "description": "Named entity: cuepr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuesc", 
+            "description": "Bad named entity: cuesc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cuesc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuesc;", 
+            "description": "Named entity: cuesc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22df"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cularr", 
+            "description": "Bad named entity: cularr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cularr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cularr;", 
+            "description": "Named entity: cularr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cularrp", 
+            "description": "Bad named entity: cularrp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cularrp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cularrp;", 
+            "description": "Named entity: cularrp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u293d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cup", 
+            "description": "Bad named entity: cup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cup;", 
+            "description": "Named entity: cup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupbrcap", 
+            "description": "Bad named entity: cupbrcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cupbrcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupbrcap;", 
+            "description": "Named entity: cupbrcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a48"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupcap", 
+            "description": "Bad named entity: cupcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cupcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupcap;", 
+            "description": "Named entity: cupcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a46"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupcup", 
+            "description": "Bad named entity: cupcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cupcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupcup;", 
+            "description": "Named entity: cupcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a4a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupdot", 
+            "description": "Bad named entity: cupdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cupdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupdot;", 
+            "description": "Named entity: cupdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupor", 
+            "description": "Bad named entity: cupor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cupor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cupor;", 
+            "description": "Named entity: cupor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a45"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cups", 
+            "description": "Bad named entity: cups without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cups"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cups;", 
+            "description": "Named entity: cups; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222a\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curarr", 
+            "description": "Bad named entity: curarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curarr;", 
+            "description": "Named entity: curarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curarrm", 
+            "description": "Bad named entity: curarrm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curarrm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curarrm;", 
+            "description": "Named entity: curarrm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u293c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyeqprec", 
+            "description": "Bad named entity: curlyeqprec without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curlyeqprec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyeqprec;", 
+            "description": "Named entity: curlyeqprec; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyeqsucc", 
+            "description": "Bad named entity: curlyeqsucc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curlyeqsucc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyeqsucc;", 
+            "description": "Named entity: curlyeqsucc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22df"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyvee", 
+            "description": "Bad named entity: curlyvee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curlyvee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlyvee;", 
+            "description": "Named entity: curlyvee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlywedge", 
+            "description": "Bad named entity: curlywedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curlywedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curlywedge;", 
+            "description": "Named entity: curlywedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curren", 
+            "description": "Named entity: curren without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curren;", 
+            "description": "Named entity: curren; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curvearrowleft", 
+            "description": "Bad named entity: curvearrowleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curvearrowleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curvearrowleft;", 
+            "description": "Named entity: curvearrowleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curvearrowright", 
+            "description": "Bad named entity: curvearrowright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&curvearrowright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&curvearrowright;", 
+            "description": "Named entity: curvearrowright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuvee", 
+            "description": "Bad named entity: cuvee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cuvee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuvee;", 
+            "description": "Named entity: cuvee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuwed", 
+            "description": "Bad named entity: cuwed without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cuwed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cuwed;", 
+            "description": "Named entity: cuwed; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cwconint", 
+            "description": "Bad named entity: cwconint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cwconint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cwconint;", 
+            "description": "Named entity: cwconint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2232"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cwint", 
+            "description": "Bad named entity: cwint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cwint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cwint;", 
+            "description": "Named entity: cwint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2231"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cylcty", 
+            "description": "Bad named entity: cylcty without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&cylcty"
+                ]
+            ]
+        }, 
+        {
+            "input": "&cylcty;", 
+            "description": "Named entity: cylcty; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u232d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dArr", 
+            "description": "Bad named entity: dArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dArr;", 
+            "description": "Named entity: dArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dHar", 
+            "description": "Bad named entity: dHar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dHar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dHar;", 
+            "description": "Named entity: dHar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2965"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dagger", 
+            "description": "Bad named entity: dagger without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dagger"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dagger;", 
+            "description": "Named entity: dagger; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2020"
+                ]
+            ]
+        }, 
+        {
+            "input": "&daleth", 
+            "description": "Bad named entity: daleth without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&daleth"
+                ]
+            ]
+        }, 
+        {
+            "input": "&daleth;", 
+            "description": "Named entity: daleth; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2138"
+                ]
+            ]
+        }, 
+        {
+            "input": "&darr", 
+            "description": "Bad named entity: darr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&darr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&darr;", 
+            "description": "Named entity: darr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2193"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dash", 
+            "description": "Bad named entity: dash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dash;", 
+            "description": "Named entity: dash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2010"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dashv", 
+            "description": "Bad named entity: dashv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dashv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dashv;", 
+            "description": "Named entity: dashv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dbkarow", 
+            "description": "Bad named entity: dbkarow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dbkarow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dbkarow;", 
+            "description": "Named entity: dbkarow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dblac", 
+            "description": "Bad named entity: dblac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dblac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dblac;", 
+            "description": "Named entity: dblac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dcaron", 
+            "description": "Bad named entity: dcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dcaron;", 
+            "description": "Named entity: dcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u010f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dcy", 
+            "description": "Bad named entity: dcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dcy;", 
+            "description": "Named entity: dcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0434"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dd", 
+            "description": "Bad named entity: dd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dd;", 
+            "description": "Named entity: dd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2146"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddagger", 
+            "description": "Bad named entity: ddagger without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ddagger"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddagger;", 
+            "description": "Named entity: ddagger; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2021"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddarr", 
+            "description": "Bad named entity: ddarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ddarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddarr;", 
+            "description": "Named entity: ddarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddotseq", 
+            "description": "Bad named entity: ddotseq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ddotseq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ddotseq;", 
+            "description": "Named entity: ddotseq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a77"
+                ]
+            ]
+        }, 
+        {
+            "input": "&deg", 
+            "description": "Named entity: deg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&deg;", 
+            "description": "Named entity: deg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&delta", 
+            "description": "Bad named entity: delta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&delta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&delta;", 
+            "description": "Named entity: delta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&demptyv", 
+            "description": "Bad named entity: demptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&demptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&demptyv;", 
+            "description": "Named entity: demptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dfisht", 
+            "description": "Bad named entity: dfisht without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dfisht"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dfisht;", 
+            "description": "Named entity: dfisht; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u297f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dfr", 
+            "description": "Bad named entity: dfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dfr;", 
+            "description": "Named entity: dfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd21"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dharl", 
+            "description": "Bad named entity: dharl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dharl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dharl;", 
+            "description": "Named entity: dharl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dharr", 
+            "description": "Bad named entity: dharr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dharr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dharr;", 
+            "description": "Named entity: dharr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diam", 
+            "description": "Bad named entity: diam without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&diam"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diam;", 
+            "description": "Named entity: diam; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diamond", 
+            "description": "Bad named entity: diamond without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&diamond"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diamond;", 
+            "description": "Named entity: diamond; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diamondsuit", 
+            "description": "Bad named entity: diamondsuit without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&diamondsuit"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diamondsuit;", 
+            "description": "Named entity: diamondsuit; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2666"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diams", 
+            "description": "Bad named entity: diams without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&diams"
+                ]
+            ]
+        }, 
+        {
+            "input": "&diams;", 
+            "description": "Named entity: diams; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2666"
+                ]
+            ]
+        }, 
+        {
+            "input": "&die", 
+            "description": "Bad named entity: die without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&die"
+                ]
+            ]
+        }, 
+        {
+            "input": "&die;", 
+            "description": "Named entity: die; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&digamma", 
+            "description": "Bad named entity: digamma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&digamma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&digamma;", 
+            "description": "Named entity: digamma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&disin", 
+            "description": "Bad named entity: disin without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&disin"
+                ]
+            ]
+        }, 
+        {
+            "input": "&disin;", 
+            "description": "Named entity: disin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&div", 
+            "description": "Bad named entity: div without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&div"
+                ]
+            ]
+        }, 
+        {
+            "input": "&div;", 
+            "description": "Named entity: div; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&divide", 
+            "description": "Named entity: divide without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&divide;", 
+            "description": "Named entity: divide; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&divideontimes;", 
+            "description": "Named entity: divideontimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&divonx", 
+            "description": "Bad named entity: divonx without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&divonx"
+                ]
+            ]
+        }, 
+        {
+            "input": "&divonx;", 
+            "description": "Named entity: divonx; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&djcy", 
+            "description": "Bad named entity: djcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&djcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&djcy;", 
+            "description": "Named entity: djcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0452"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dlcorn", 
+            "description": "Bad named entity: dlcorn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dlcorn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dlcorn;", 
+            "description": "Named entity: dlcorn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dlcrop", 
+            "description": "Bad named entity: dlcrop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dlcrop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dlcrop;", 
+            "description": "Named entity: dlcrop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dollar", 
+            "description": "Bad named entity: dollar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dollar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dollar;", 
+            "description": "Named entity: dollar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "$"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dopf", 
+            "description": "Bad named entity: dopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dopf;", 
+            "description": "Named entity: dopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd55"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dot", 
+            "description": "Bad named entity: dot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dot;", 
+            "description": "Named entity: dot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doteq", 
+            "description": "Bad named entity: doteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&doteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doteq;", 
+            "description": "Named entity: doteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2250"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doteqdot", 
+            "description": "Bad named entity: doteqdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&doteqdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doteqdot;", 
+            "description": "Named entity: doteqdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2251"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotminus", 
+            "description": "Bad named entity: dotminus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dotminus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotminus;", 
+            "description": "Named entity: dotminus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2238"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotplus", 
+            "description": "Bad named entity: dotplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dotplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotplus;", 
+            "description": "Named entity: dotplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2214"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotsquare", 
+            "description": "Bad named entity: dotsquare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dotsquare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dotsquare;", 
+            "description": "Named entity: dotsquare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doublebarwedge", 
+            "description": "Bad named entity: doublebarwedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&doublebarwedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&doublebarwedge;", 
+            "description": "Named entity: doublebarwedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2306"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downarrow", 
+            "description": "Bad named entity: downarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&downarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downarrow;", 
+            "description": "Named entity: downarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2193"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downdownarrows", 
+            "description": "Bad named entity: downdownarrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&downdownarrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downdownarrows;", 
+            "description": "Named entity: downdownarrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downharpoonleft", 
+            "description": "Bad named entity: downharpoonleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&downharpoonleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downharpoonleft;", 
+            "description": "Named entity: downharpoonleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downharpoonright", 
+            "description": "Bad named entity: downharpoonright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&downharpoonright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&downharpoonright;", 
+            "description": "Named entity: downharpoonright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drbkarow", 
+            "description": "Bad named entity: drbkarow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&drbkarow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drbkarow;", 
+            "description": "Named entity: drbkarow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2910"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drcorn", 
+            "description": "Bad named entity: drcorn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&drcorn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drcorn;", 
+            "description": "Named entity: drcorn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drcrop", 
+            "description": "Bad named entity: drcrop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&drcrop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&drcrop;", 
+            "description": "Named entity: drcrop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dscr", 
+            "description": "Bad named entity: dscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dscr;", 
+            "description": "Named entity: dscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcb9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dscy", 
+            "description": "Bad named entity: dscy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dscy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dscy;", 
+            "description": "Named entity: dscy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0455"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dsol", 
+            "description": "Bad named entity: dsol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dsol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dsol;", 
+            "description": "Named entity: dsol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dstrok", 
+            "description": "Bad named entity: dstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dstrok;", 
+            "description": "Named entity: dstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0111"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtdot", 
+            "description": "Bad named entity: dtdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dtdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtdot;", 
+            "description": "Named entity: dtdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtri", 
+            "description": "Bad named entity: dtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtri;", 
+            "description": "Named entity: dtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtrif", 
+            "description": "Bad named entity: dtrif without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dtrif"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dtrif;", 
+            "description": "Named entity: dtrif; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&duarr", 
+            "description": "Bad named entity: duarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&duarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&duarr;", 
+            "description": "Named entity: duarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&duhar", 
+            "description": "Bad named entity: duhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&duhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&duhar;", 
+            "description": "Named entity: duhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dwangle", 
+            "description": "Bad named entity: dwangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dwangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dwangle;", 
+            "description": "Named entity: dwangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dzcy", 
+            "description": "Bad named entity: dzcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dzcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dzcy;", 
+            "description": "Named entity: dzcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u045f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dzigrarr", 
+            "description": "Bad named entity: dzigrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&dzigrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&dzigrarr;", 
+            "description": "Named entity: dzigrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27ff"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eDDot", 
+            "description": "Bad named entity: eDDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eDDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eDDot;", 
+            "description": "Named entity: eDDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a77"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eDot", 
+            "description": "Bad named entity: eDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eDot;", 
+            "description": "Named entity: eDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2251"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eacute", 
+            "description": "Named entity: eacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eacute;", 
+            "description": "Named entity: eacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&easter", 
+            "description": "Bad named entity: easter without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&easter"
+                ]
+            ]
+        }, 
+        {
+            "input": "&easter;", 
+            "description": "Named entity: easter; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a6e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecaron", 
+            "description": "Bad named entity: ecaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ecaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecaron;", 
+            "description": "Named entity: ecaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecir", 
+            "description": "Bad named entity: ecir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ecir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecir;", 
+            "description": "Named entity: ecir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2256"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecirc", 
+            "description": "Named entity: ecirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecirc;", 
+            "description": "Named entity: ecirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecolon", 
+            "description": "Bad named entity: ecolon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ecolon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecolon;", 
+            "description": "Named entity: ecolon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2255"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecy", 
+            "description": "Bad named entity: ecy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ecy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ecy;", 
+            "description": "Named entity: ecy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&edot", 
+            "description": "Bad named entity: edot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&edot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&edot;", 
+            "description": "Named entity: edot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0117"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ee", 
+            "description": "Bad named entity: ee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ee;", 
+            "description": "Named entity: ee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2147"
+                ]
+            ]
+        }, 
+        {
+            "input": "&efDot", 
+            "description": "Bad named entity: efDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&efDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&efDot;", 
+            "description": "Named entity: efDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2252"
+                ]
+            ]
+        }, 
+        {
+            "input": "&efr", 
+            "description": "Bad named entity: efr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&efr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&efr;", 
+            "description": "Named entity: efr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd22"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eg", 
+            "description": "Bad named entity: eg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eg;", 
+            "description": "Named entity: eg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a9a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egrave", 
+            "description": "Named entity: egrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egrave;", 
+            "description": "Named entity: egrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egs", 
+            "description": "Bad named entity: egs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&egs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egs;", 
+            "description": "Named entity: egs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a96"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egsdot", 
+            "description": "Bad named entity: egsdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&egsdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&egsdot;", 
+            "description": "Named entity: egsdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a98"
+                ]
+            ]
+        }, 
+        {
+            "input": "&el", 
+            "description": "Bad named entity: el without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&el"
+                ]
+            ]
+        }, 
+        {
+            "input": "&el;", 
+            "description": "Named entity: el; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a99"
+                ]
+            ]
+        }, 
+        {
+            "input": "&elinters", 
+            "description": "Bad named entity: elinters without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&elinters"
+                ]
+            ]
+        }, 
+        {
+            "input": "&elinters;", 
+            "description": "Named entity: elinters; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ell", 
+            "description": "Bad named entity: ell without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ell"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ell;", 
+            "description": "Named entity: ell; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2113"
+                ]
+            ]
+        }, 
+        {
+            "input": "&els", 
+            "description": "Bad named entity: els without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&els"
+                ]
+            ]
+        }, 
+        {
+            "input": "&els;", 
+            "description": "Named entity: els; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a95"
+                ]
+            ]
+        }, 
+        {
+            "input": "&elsdot", 
+            "description": "Bad named entity: elsdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&elsdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&elsdot;", 
+            "description": "Named entity: elsdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a97"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emacr", 
+            "description": "Bad named entity: emacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emacr;", 
+            "description": "Named entity: emacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0113"
+                ]
+            ]
+        }, 
+        {
+            "input": "&empty", 
+            "description": "Bad named entity: empty without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&empty"
+                ]
+            ]
+        }, 
+        {
+            "input": "&empty;", 
+            "description": "Named entity: empty; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2205"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emptyset", 
+            "description": "Bad named entity: emptyset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emptyset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emptyset;", 
+            "description": "Named entity: emptyset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2205"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emptyv", 
+            "description": "Bad named entity: emptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emptyv;", 
+            "description": "Named entity: emptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2205"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp", 
+            "description": "Bad named entity: emsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emsp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp13", 
+            "description": "Bad named entity: emsp13 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emsp13"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp13;", 
+            "description": "Named entity: emsp13; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2004"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp14", 
+            "description": "Bad named entity: emsp14 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&emsp14"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp14;", 
+            "description": "Named entity: emsp14; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2005"
+                ]
+            ]
+        }, 
+        {
+            "input": "&emsp;", 
+            "description": "Named entity: emsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2003"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eng", 
+            "description": "Bad named entity: eng without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eng"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eng;", 
+            "description": "Named entity: eng; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u014b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ensp", 
+            "description": "Bad named entity: ensp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ensp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ensp;", 
+            "description": "Named entity: ensp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2002"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eogon", 
+            "description": "Bad named entity: eogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eogon;", 
+            "description": "Named entity: eogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0119"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eopf", 
+            "description": "Bad named entity: eopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eopf;", 
+            "description": "Named entity: eopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd56"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epar", 
+            "description": "Bad named entity: epar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&epar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epar;", 
+            "description": "Named entity: epar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eparsl", 
+            "description": "Bad named entity: eparsl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eparsl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eparsl;", 
+            "description": "Named entity: eparsl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29e3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eplus", 
+            "description": "Bad named entity: eplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eplus;", 
+            "description": "Named entity: eplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a71"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsi", 
+            "description": "Bad named entity: epsi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&epsi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsi;", 
+            "description": "Named entity: epsi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsilon", 
+            "description": "Bad named entity: epsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&epsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsilon;", 
+            "description": "Named entity: epsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsiv", 
+            "description": "Bad named entity: epsiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&epsiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&epsiv;", 
+            "description": "Named entity: epsiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqcirc", 
+            "description": "Bad named entity: eqcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqcirc;", 
+            "description": "Named entity: eqcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2256"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqcolon", 
+            "description": "Bad named entity: eqcolon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqcolon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqcolon;", 
+            "description": "Named entity: eqcolon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2255"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqsim", 
+            "description": "Bad named entity: eqsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqsim;", 
+            "description": "Named entity: eqsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2242"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqslantgtr", 
+            "description": "Bad named entity: eqslantgtr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqslantgtr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqslantgtr;", 
+            "description": "Named entity: eqslantgtr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a96"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqslantless", 
+            "description": "Bad named entity: eqslantless without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqslantless"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqslantless;", 
+            "description": "Named entity: eqslantless; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a95"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equals", 
+            "description": "Bad named entity: equals without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&equals"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equals;", 
+            "description": "Named entity: equals; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "="
+                ]
+            ]
+        }, 
+        {
+            "input": "&equest", 
+            "description": "Bad named entity: equest without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&equest"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equest;", 
+            "description": "Named entity: equest; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u225f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equiv", 
+            "description": "Bad named entity: equiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&equiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equiv;", 
+            "description": "Named entity: equiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2261"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equivDD", 
+            "description": "Bad named entity: equivDD without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&equivDD"
+                ]
+            ]
+        }, 
+        {
+            "input": "&equivDD;", 
+            "description": "Named entity: equivDD; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a78"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqvparsl", 
+            "description": "Bad named entity: eqvparsl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eqvparsl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eqvparsl;", 
+            "description": "Named entity: eqvparsl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&erDot", 
+            "description": "Bad named entity: erDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&erDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&erDot;", 
+            "description": "Named entity: erDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2253"
+                ]
+            ]
+        }, 
+        {
+            "input": "&erarr", 
+            "description": "Bad named entity: erarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&erarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&erarr;", 
+            "description": "Named entity: erarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2971"
+                ]
+            ]
+        }, 
+        {
+            "input": "&escr", 
+            "description": "Bad named entity: escr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&escr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&escr;", 
+            "description": "Named entity: escr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u212f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&esdot", 
+            "description": "Bad named entity: esdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&esdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&esdot;", 
+            "description": "Named entity: esdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2250"
+                ]
+            ]
+        }, 
+        {
+            "input": "&esim", 
+            "description": "Bad named entity: esim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&esim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&esim;", 
+            "description": "Named entity: esim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2242"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eta", 
+            "description": "Bad named entity: eta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&eta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eta;", 
+            "description": "Named entity: eta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eth", 
+            "description": "Named entity: eth without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&eth;", 
+            "description": "Named entity: eth; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&euml", 
+            "description": "Named entity: euml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&euml;", 
+            "description": "Named entity: euml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&euro", 
+            "description": "Bad named entity: euro without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&euro"
+                ]
+            ]
+        }, 
+        {
+            "input": "&euro;", 
+            "description": "Named entity: euro; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u20ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&excl", 
+            "description": "Bad named entity: excl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&excl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&excl;", 
+            "description": "Named entity: excl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "!"
+                ]
+            ]
+        }, 
+        {
+            "input": "&exist", 
+            "description": "Bad named entity: exist without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&exist"
+                ]
+            ]
+        }, 
+        {
+            "input": "&exist;", 
+            "description": "Named entity: exist; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2203"
+                ]
+            ]
+        }, 
+        {
+            "input": "&expectation", 
+            "description": "Bad named entity: expectation without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&expectation"
+                ]
+            ]
+        }, 
+        {
+            "input": "&expectation;", 
+            "description": "Named entity: expectation; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2130"
+                ]
+            ]
+        }, 
+        {
+            "input": "&exponentiale", 
+            "description": "Bad named entity: exponentiale without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&exponentiale"
+                ]
+            ]
+        }, 
+        {
+            "input": "&exponentiale;", 
+            "description": "Named entity: exponentiale; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2147"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fallingdotseq", 
+            "description": "Bad named entity: fallingdotseq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fallingdotseq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fallingdotseq;", 
+            "description": "Named entity: fallingdotseq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2252"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fcy", 
+            "description": "Bad named entity: fcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fcy;", 
+            "description": "Named entity: fcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0444"
+                ]
+            ]
+        }, 
+        {
+            "input": "&female", 
+            "description": "Bad named entity: female without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&female"
+                ]
+            ]
+        }, 
+        {
+            "input": "&female;", 
+            "description": "Named entity: female; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2640"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffilig", 
+            "description": "Bad named entity: ffilig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ffilig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffilig;", 
+            "description": "Named entity: ffilig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ufb03"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fflig", 
+            "description": "Bad named entity: fflig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fflig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fflig;", 
+            "description": "Named entity: fflig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ufb00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffllig", 
+            "description": "Bad named entity: ffllig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ffllig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffllig;", 
+            "description": "Named entity: ffllig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ufb04"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffr", 
+            "description": "Bad named entity: ffr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ffr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ffr;", 
+            "description": "Named entity: ffr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd23"
+                ]
+            ]
+        }, 
+        {
+            "input": "&filig", 
+            "description": "Bad named entity: filig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&filig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&filig;", 
+            "description": "Named entity: filig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ufb01"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fjlig", 
+            "description": "Bad named entity: fjlig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fjlig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fjlig;", 
+            "description": "Named entity: fjlig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "fj"
+                ]
+            ]
+        }, 
+        {
+            "input": "&flat", 
+            "description": "Bad named entity: flat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&flat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&flat;", 
+            "description": "Named entity: flat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u266d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fllig", 
+            "description": "Bad named entity: fllig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fllig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fllig;", 
+            "description": "Named entity: fllig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ufb02"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fltns", 
+            "description": "Bad named entity: fltns without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fltns"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fltns;", 
+            "description": "Named entity: fltns; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fnof", 
+            "description": "Bad named entity: fnof without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fnof"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fnof;", 
+            "description": "Named entity: fnof; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fopf", 
+            "description": "Bad named entity: fopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fopf;", 
+            "description": "Named entity: fopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd57"
+                ]
+            ]
+        }, 
+        {
+            "input": "&forall", 
+            "description": "Bad named entity: forall without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&forall"
+                ]
+            ]
+        }, 
+        {
+            "input": "&forall;", 
+            "description": "Named entity: forall; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2200"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fork", 
+            "description": "Bad named entity: fork without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fork"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fork;", 
+            "description": "Named entity: fork; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&forkv", 
+            "description": "Bad named entity: forkv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&forkv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&forkv;", 
+            "description": "Named entity: forkv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fpartint", 
+            "description": "Bad named entity: fpartint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fpartint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fpartint;", 
+            "description": "Named entity: fpartint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a0d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac12", 
+            "description": "Named entity: frac12 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac12;", 
+            "description": "Named entity: frac12; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac13", 
+            "description": "Bad named entity: frac13 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac13"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac13;", 
+            "description": "Named entity: frac13; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2153"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac14", 
+            "description": "Named entity: frac14 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac14;", 
+            "description": "Named entity: frac14; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac15", 
+            "description": "Bad named entity: frac15 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac15"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac15;", 
+            "description": "Named entity: frac15; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2155"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac16", 
+            "description": "Bad named entity: frac16 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac16"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac16;", 
+            "description": "Named entity: frac16; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2159"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac18", 
+            "description": "Bad named entity: frac18 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac18"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac18;", 
+            "description": "Named entity: frac18; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u215b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac23", 
+            "description": "Bad named entity: frac23 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac23"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac23;", 
+            "description": "Named entity: frac23; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2154"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac25", 
+            "description": "Bad named entity: frac25 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac25"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac25;", 
+            "description": "Named entity: frac25; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2156"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac34", 
+            "description": "Named entity: frac34 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac34;", 
+            "description": "Named entity: frac34; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac35", 
+            "description": "Bad named entity: frac35 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac35"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac35;", 
+            "description": "Named entity: frac35; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2157"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac38", 
+            "description": "Bad named entity: frac38 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac38"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac38;", 
+            "description": "Named entity: frac38; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u215c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac45", 
+            "description": "Bad named entity: frac45 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac45"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac45;", 
+            "description": "Named entity: frac45; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2158"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac56", 
+            "description": "Bad named entity: frac56 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac56"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac56;", 
+            "description": "Named entity: frac56; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u215a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac58", 
+            "description": "Bad named entity: frac58 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac58"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac58;", 
+            "description": "Named entity: frac58; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u215d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac78", 
+            "description": "Bad named entity: frac78 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frac78"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frac78;", 
+            "description": "Named entity: frac78; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u215e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frasl", 
+            "description": "Bad named entity: frasl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frasl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frasl;", 
+            "description": "Named entity: frasl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2044"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frown", 
+            "description": "Bad named entity: frown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&frown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&frown;", 
+            "description": "Named entity: frown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2322"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fscr", 
+            "description": "Bad named entity: fscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&fscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&fscr;", 
+            "description": "Named entity: fscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcbb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gE", 
+            "description": "Bad named entity: gE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gE;", 
+            "description": "Named entity: gE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gEl", 
+            "description": "Bad named entity: gEl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gEl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gEl;", 
+            "description": "Named entity: gEl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gacute", 
+            "description": "Bad named entity: gacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gacute;", 
+            "description": "Named entity: gacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u01f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gamma", 
+            "description": "Bad named entity: gamma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gamma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gamma;", 
+            "description": "Named entity: gamma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gammad", 
+            "description": "Bad named entity: gammad without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gammad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gammad;", 
+            "description": "Named entity: gammad; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gap", 
+            "description": "Bad named entity: gap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gap;", 
+            "description": "Named entity: gap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a86"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gbreve", 
+            "description": "Bad named entity: gbreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gbreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gbreve;", 
+            "description": "Named entity: gbreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gcirc", 
+            "description": "Bad named entity: gcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gcirc;", 
+            "description": "Named entity: gcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u011d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gcy", 
+            "description": "Bad named entity: gcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gcy;", 
+            "description": "Named entity: gcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0433"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gdot", 
+            "description": "Bad named entity: gdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gdot;", 
+            "description": "Named entity: gdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0121"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ge", 
+            "description": "Bad named entity: ge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ge;", 
+            "description": "Named entity: ge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2265"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gel", 
+            "description": "Bad named entity: gel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gel;", 
+            "description": "Named entity: gel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geq", 
+            "description": "Bad named entity: geq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&geq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geq;", 
+            "description": "Named entity: geq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2265"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geqq", 
+            "description": "Bad named entity: geqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&geqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geqq;", 
+            "description": "Named entity: geqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geqslant", 
+            "description": "Bad named entity: geqslant without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&geqslant"
+                ]
+            ]
+        }, 
+        {
+            "input": "&geqslant;", 
+            "description": "Named entity: geqslant; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ges", 
+            "description": "Bad named entity: ges without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ges"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ges;", 
+            "description": "Named entity: ges; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gescc", 
+            "description": "Bad named entity: gescc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gescc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gescc;", 
+            "description": "Named entity: gescc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdot", 
+            "description": "Bad named entity: gesdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gesdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdot;", 
+            "description": "Named entity: gesdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a80"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdoto", 
+            "description": "Bad named entity: gesdoto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gesdoto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdoto;", 
+            "description": "Named entity: gesdoto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a82"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdotol", 
+            "description": "Bad named entity: gesdotol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gesdotol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesdotol;", 
+            "description": "Named entity: gesdotol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a84"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesl", 
+            "description": "Bad named entity: gesl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gesl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesl;", 
+            "description": "Named entity: gesl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22db\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesles", 
+            "description": "Bad named entity: gesles without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gesles"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gesles;", 
+            "description": "Named entity: gesles; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a94"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gfr", 
+            "description": "Bad named entity: gfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gfr;", 
+            "description": "Named entity: gfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd24"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gg", 
+            "description": "Bad named entity: gg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gg;", 
+            "description": "Named entity: gg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ggg", 
+            "description": "Bad named entity: ggg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ggg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ggg;", 
+            "description": "Named entity: ggg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gimel", 
+            "description": "Bad named entity: gimel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gimel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gimel;", 
+            "description": "Named entity: gimel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2137"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gjcy", 
+            "description": "Bad named entity: gjcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gjcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gjcy;", 
+            "description": "Named entity: gjcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0453"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gl", 
+            "description": "Bad named entity: gl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gl;", 
+            "description": "Named entity: gl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2277"
+                ]
+            ]
+        }, 
+        {
+            "input": "&glE", 
+            "description": "Bad named entity: glE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&glE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&glE;", 
+            "description": "Named entity: glE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a92"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gla", 
+            "description": "Bad named entity: gla without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gla"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gla;", 
+            "description": "Named entity: gla; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&glj", 
+            "description": "Bad named entity: glj without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&glj"
+                ]
+            ]
+        }, 
+        {
+            "input": "&glj;", 
+            "description": "Named entity: glj; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnE", 
+            "description": "Bad named entity: gnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnE;", 
+            "description": "Named entity: gnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2269"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnap", 
+            "description": "Bad named entity: gnap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gnap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnap;", 
+            "description": "Named entity: gnap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnapprox", 
+            "description": "Bad named entity: gnapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gnapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnapprox;", 
+            "description": "Named entity: gnapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gne", 
+            "description": "Bad named entity: gne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gne;", 
+            "description": "Named entity: gne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a88"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gneq", 
+            "description": "Bad named entity: gneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gneq;", 
+            "description": "Named entity: gneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a88"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gneqq", 
+            "description": "Bad named entity: gneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gneqq;", 
+            "description": "Named entity: gneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2269"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnsim", 
+            "description": "Bad named entity: gnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gnsim;", 
+            "description": "Named entity: gnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gopf", 
+            "description": "Bad named entity: gopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gopf;", 
+            "description": "Named entity: gopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd58"
+                ]
+            ]
+        }, 
+        {
+            "input": "&grave", 
+            "description": "Bad named entity: grave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&grave"
+                ]
+            ]
+        }, 
+        {
+            "input": "&grave;", 
+            "description": "Named entity: grave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "`"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gscr", 
+            "description": "Bad named entity: gscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gscr;", 
+            "description": "Named entity: gscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsim", 
+            "description": "Bad named entity: gsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsim;", 
+            "description": "Named entity: gsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2273"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsime", 
+            "description": "Bad named entity: gsime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gsime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsime;", 
+            "description": "Named entity: gsime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsiml", 
+            "description": "Bad named entity: gsiml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gsiml"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gsiml;", 
+            "description": "Named entity: gsiml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a90"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gt", 
+            "description": "Named entity: gt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    ">"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gt;", 
+            "description": "Named entity: gt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ">"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtcc;", 
+            "description": "Named entity: gtcc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtcir;", 
+            "description": "Named entity: gtcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtdot;", 
+            "description": "Named entity: gtdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtlPar;", 
+            "description": "Named entity: gtlPar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2995"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtquest;", 
+            "description": "Named entity: gtquest; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtrapprox;", 
+            "description": "Named entity: gtrapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a86"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtrarr;", 
+            "description": "Named entity: gtrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2978"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtrdot;", 
+            "description": "Named entity: gtrdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtreqless;", 
+            "description": "Named entity: gtreqless; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtreqqless;", 
+            "description": "Named entity: gtreqqless; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtrless;", 
+            "description": "Named entity: gtrless; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2277"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gtrsim;", 
+            "description": "Named entity: gtrsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2273"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gvertneqq", 
+            "description": "Bad named entity: gvertneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gvertneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gvertneqq;", 
+            "description": "Named entity: gvertneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2269\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gvnE", 
+            "description": "Bad named entity: gvnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&gvnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&gvnE;", 
+            "description": "Named entity: gvnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2269\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hArr", 
+            "description": "Bad named entity: hArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hArr;", 
+            "description": "Named entity: hArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hairsp", 
+            "description": "Bad named entity: hairsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hairsp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hairsp;", 
+            "description": "Named entity: hairsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&half", 
+            "description": "Bad named entity: half without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&half"
+                ]
+            ]
+        }, 
+        {
+            "input": "&half;", 
+            "description": "Named entity: half; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hamilt", 
+            "description": "Bad named entity: hamilt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hamilt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hamilt;", 
+            "description": "Named entity: hamilt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hardcy", 
+            "description": "Bad named entity: hardcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hardcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hardcy;", 
+            "description": "Named entity: hardcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harr", 
+            "description": "Bad named entity: harr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&harr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harr;", 
+            "description": "Named entity: harr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2194"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harrcir", 
+            "description": "Bad named entity: harrcir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&harrcir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harrcir;", 
+            "description": "Named entity: harrcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2948"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harrw", 
+            "description": "Bad named entity: harrw without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&harrw"
+                ]
+            ]
+        }, 
+        {
+            "input": "&harrw;", 
+            "description": "Named entity: harrw; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hbar", 
+            "description": "Bad named entity: hbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hbar;", 
+            "description": "Named entity: hbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hcirc", 
+            "description": "Bad named entity: hcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hcirc;", 
+            "description": "Named entity: hcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0125"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hearts", 
+            "description": "Bad named entity: hearts without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hearts"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hearts;", 
+            "description": "Named entity: hearts; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2665"
+                ]
+            ]
+        }, 
+        {
+            "input": "&heartsuit", 
+            "description": "Bad named entity: heartsuit without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&heartsuit"
+                ]
+            ]
+        }, 
+        {
+            "input": "&heartsuit;", 
+            "description": "Named entity: heartsuit; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2665"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hellip", 
+            "description": "Bad named entity: hellip without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hellip"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hellip;", 
+            "description": "Named entity: hellip; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2026"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hercon", 
+            "description": "Bad named entity: hercon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hercon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hercon;", 
+            "description": "Named entity: hercon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hfr", 
+            "description": "Bad named entity: hfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hfr;", 
+            "description": "Named entity: hfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd25"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hksearow", 
+            "description": "Bad named entity: hksearow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hksearow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hksearow;", 
+            "description": "Named entity: hksearow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2925"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hkswarow", 
+            "description": "Bad named entity: hkswarow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hkswarow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hkswarow;", 
+            "description": "Named entity: hkswarow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2926"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hoarr", 
+            "description": "Bad named entity: hoarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hoarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hoarr;", 
+            "description": "Named entity: hoarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ff"
+                ]
+            ]
+        }, 
+        {
+            "input": "&homtht", 
+            "description": "Bad named entity: homtht without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&homtht"
+                ]
+            ]
+        }, 
+        {
+            "input": "&homtht;", 
+            "description": "Named entity: homtht; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hookleftarrow", 
+            "description": "Bad named entity: hookleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hookleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hookleftarrow;", 
+            "description": "Named entity: hookleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hookrightarrow", 
+            "description": "Bad named entity: hookrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hookrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hookrightarrow;", 
+            "description": "Named entity: hookrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hopf", 
+            "description": "Bad named entity: hopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hopf;", 
+            "description": "Named entity: hopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd59"
+                ]
+            ]
+        }, 
+        {
+            "input": "&horbar", 
+            "description": "Bad named entity: horbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&horbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&horbar;", 
+            "description": "Named entity: horbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2015"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hscr", 
+            "description": "Bad named entity: hscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hscr;", 
+            "description": "Named entity: hscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcbd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hslash", 
+            "description": "Bad named entity: hslash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hslash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hslash;", 
+            "description": "Named entity: hslash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hstrok", 
+            "description": "Bad named entity: hstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hstrok;", 
+            "description": "Named entity: hstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0127"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hybull", 
+            "description": "Bad named entity: hybull without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hybull"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hybull;", 
+            "description": "Named entity: hybull; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2043"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hyphen", 
+            "description": "Bad named entity: hyphen without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&hyphen"
+                ]
+            ]
+        }, 
+        {
+            "input": "&hyphen;", 
+            "description": "Named entity: hyphen; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2010"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iacute", 
+            "description": "Named entity: iacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iacute;", 
+            "description": "Named entity: iacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ic", 
+            "description": "Bad named entity: ic without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ic"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ic;", 
+            "description": "Named entity: ic; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2063"
+                ]
+            ]
+        }, 
+        {
+            "input": "&icirc", 
+            "description": "Named entity: icirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&icirc;", 
+            "description": "Named entity: icirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&icy", 
+            "description": "Bad named entity: icy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&icy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&icy;", 
+            "description": "Named entity: icy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0438"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iecy", 
+            "description": "Bad named entity: iecy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iecy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iecy;", 
+            "description": "Named entity: iecy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0435"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iexcl", 
+            "description": "Named entity: iexcl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iexcl;", 
+            "description": "Named entity: iexcl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iff", 
+            "description": "Bad named entity: iff without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iff"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iff;", 
+            "description": "Named entity: iff; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ifr", 
+            "description": "Bad named entity: ifr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ifr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ifr;", 
+            "description": "Named entity: ifr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd26"
+                ]
+            ]
+        }, 
+        {
+            "input": "&igrave", 
+            "description": "Named entity: igrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&igrave;", 
+            "description": "Named entity: igrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ii", 
+            "description": "Bad named entity: ii without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ii"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ii;", 
+            "description": "Named entity: ii; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2148"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiiint", 
+            "description": "Bad named entity: iiiint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iiiint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiiint;", 
+            "description": "Named entity: iiiint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a0c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiint", 
+            "description": "Bad named entity: iiint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iiint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiint;", 
+            "description": "Named entity: iiint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iinfin", 
+            "description": "Bad named entity: iinfin without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iinfin"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iinfin;", 
+            "description": "Named entity: iinfin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiota", 
+            "description": "Bad named entity: iiota without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iiota"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iiota;", 
+            "description": "Named entity: iiota; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2129"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ijlig", 
+            "description": "Bad named entity: ijlig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ijlig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ijlig;", 
+            "description": "Named entity: ijlig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0133"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imacr", 
+            "description": "Bad named entity: imacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imacr;", 
+            "description": "Named entity: imacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u012b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&image", 
+            "description": "Bad named entity: image without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&image"
+                ]
+            ]
+        }, 
+        {
+            "input": "&image;", 
+            "description": "Named entity: image; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2111"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imagline", 
+            "description": "Bad named entity: imagline without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imagline"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imagline;", 
+            "description": "Named entity: imagline; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2110"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imagpart", 
+            "description": "Bad named entity: imagpart without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imagpart"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imagpart;", 
+            "description": "Named entity: imagpart; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2111"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imath", 
+            "description": "Bad named entity: imath without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imath"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imath;", 
+            "description": "Named entity: imath; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0131"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imof", 
+            "description": "Bad named entity: imof without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imof"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imof;", 
+            "description": "Named entity: imof; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imped", 
+            "description": "Bad named entity: imped without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&imped"
+                ]
+            ]
+        }, 
+        {
+            "input": "&imped;", 
+            "description": "Named entity: imped; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u01b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&in", 
+            "description": "Bad named entity: in without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&in"
+                ]
+            ]
+        }, 
+        {
+            "input": "&in;", 
+            "description": "Named entity: in; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2208"
+                ]
+            ]
+        }, 
+        {
+            "input": "&incare", 
+            "description": "Bad named entity: incare without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&incare"
+                ]
+            ]
+        }, 
+        {
+            "input": "&incare;", 
+            "description": "Named entity: incare; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2105"
+                ]
+            ]
+        }, 
+        {
+            "input": "&infin", 
+            "description": "Bad named entity: infin without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&infin"
+                ]
+            ]
+        }, 
+        {
+            "input": "&infin;", 
+            "description": "Named entity: infin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&infintie", 
+            "description": "Bad named entity: infintie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&infintie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&infintie;", 
+            "description": "Named entity: infintie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&inodot", 
+            "description": "Bad named entity: inodot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&inodot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&inodot;", 
+            "description": "Named entity: inodot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0131"
+                ]
+            ]
+        }, 
+        {
+            "input": "&int", 
+            "description": "Bad named entity: int without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&int"
+                ]
+            ]
+        }, 
+        {
+            "input": "&int;", 
+            "description": "Named entity: int; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intcal", 
+            "description": "Bad named entity: intcal without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&intcal"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intcal;", 
+            "description": "Named entity: intcal; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&integers", 
+            "description": "Bad named entity: integers without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&integers"
+                ]
+            ]
+        }, 
+        {
+            "input": "&integers;", 
+            "description": "Named entity: integers; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2124"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intercal", 
+            "description": "Bad named entity: intercal without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&intercal"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intercal;", 
+            "description": "Named entity: intercal; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intlarhk", 
+            "description": "Bad named entity: intlarhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&intlarhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intlarhk;", 
+            "description": "Named entity: intlarhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a17"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intprod", 
+            "description": "Bad named entity: intprod without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&intprod"
+                ]
+            ]
+        }, 
+        {
+            "input": "&intprod;", 
+            "description": "Named entity: intprod; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a3c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iocy", 
+            "description": "Bad named entity: iocy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iocy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iocy;", 
+            "description": "Named entity: iocy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0451"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iogon", 
+            "description": "Bad named entity: iogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iogon;", 
+            "description": "Named entity: iogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u012f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iopf", 
+            "description": "Bad named entity: iopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iopf;", 
+            "description": "Named entity: iopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iota", 
+            "description": "Bad named entity: iota without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iota"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iota;", 
+            "description": "Named entity: iota; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iprod", 
+            "description": "Bad named entity: iprod without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iprod"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iprod;", 
+            "description": "Named entity: iprod; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a3c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iquest", 
+            "description": "Named entity: iquest without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iquest;", 
+            "description": "Named entity: iquest; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iscr", 
+            "description": "Bad named entity: iscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iscr;", 
+            "description": "Named entity: iscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcbe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isin", 
+            "description": "Bad named entity: isin without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isin"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isin;", 
+            "description": "Named entity: isin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2208"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinE", 
+            "description": "Bad named entity: isinE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isinE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinE;", 
+            "description": "Named entity: isinE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isindot", 
+            "description": "Bad named entity: isindot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isindot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isindot;", 
+            "description": "Named entity: isindot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isins", 
+            "description": "Bad named entity: isins without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isins"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isins;", 
+            "description": "Named entity: isins; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinsv", 
+            "description": "Bad named entity: isinsv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isinsv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinsv;", 
+            "description": "Named entity: isinsv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinv", 
+            "description": "Bad named entity: isinv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&isinv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&isinv;", 
+            "description": "Named entity: isinv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2208"
+                ]
+            ]
+        }, 
+        {
+            "input": "&it", 
+            "description": "Bad named entity: it without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&it"
+                ]
+            ]
+        }, 
+        {
+            "input": "&it;", 
+            "description": "Named entity: it; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2062"
+                ]
+            ]
+        }, 
+        {
+            "input": "&itilde", 
+            "description": "Bad named entity: itilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&itilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&itilde;", 
+            "description": "Named entity: itilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0129"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iukcy", 
+            "description": "Bad named entity: iukcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&iukcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iukcy;", 
+            "description": "Named entity: iukcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0456"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iuml", 
+            "description": "Named entity: iuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&iuml;", 
+            "description": "Named entity: iuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jcirc", 
+            "description": "Bad named entity: jcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jcirc;", 
+            "description": "Named entity: jcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0135"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jcy", 
+            "description": "Bad named entity: jcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jcy;", 
+            "description": "Named entity: jcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0439"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jfr", 
+            "description": "Bad named entity: jfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jfr;", 
+            "description": "Named entity: jfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd27"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jmath", 
+            "description": "Bad named entity: jmath without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jmath"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jmath;", 
+            "description": "Named entity: jmath; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0237"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jopf", 
+            "description": "Bad named entity: jopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jopf;", 
+            "description": "Named entity: jopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jscr", 
+            "description": "Bad named entity: jscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jscr;", 
+            "description": "Named entity: jscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcbf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jsercy", 
+            "description": "Bad named entity: jsercy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jsercy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jsercy;", 
+            "description": "Named entity: jsercy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0458"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jukcy", 
+            "description": "Bad named entity: jukcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&jukcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&jukcy;", 
+            "description": "Named entity: jukcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0454"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kappa", 
+            "description": "Bad named entity: kappa without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kappa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kappa;", 
+            "description": "Named entity: kappa; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kappav", 
+            "description": "Bad named entity: kappav without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kappav"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kappav;", 
+            "description": "Named entity: kappav; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kcedil", 
+            "description": "Bad named entity: kcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kcedil;", 
+            "description": "Named entity: kcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0137"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kcy", 
+            "description": "Bad named entity: kcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kcy;", 
+            "description": "Named entity: kcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kfr", 
+            "description": "Bad named entity: kfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kfr;", 
+            "description": "Named entity: kfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd28"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kgreen", 
+            "description": "Bad named entity: kgreen without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kgreen"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kgreen;", 
+            "description": "Named entity: kgreen; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0138"
+                ]
+            ]
+        }, 
+        {
+            "input": "&khcy", 
+            "description": "Bad named entity: khcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&khcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&khcy;", 
+            "description": "Named entity: khcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0445"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kjcy", 
+            "description": "Bad named entity: kjcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kjcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kjcy;", 
+            "description": "Named entity: kjcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u045c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kopf", 
+            "description": "Bad named entity: kopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kopf;", 
+            "description": "Named entity: kopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kscr", 
+            "description": "Bad named entity: kscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&kscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&kscr;", 
+            "description": "Named entity: kscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lAarr", 
+            "description": "Bad named entity: lAarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lAarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lAarr;", 
+            "description": "Named entity: lAarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lArr", 
+            "description": "Bad named entity: lArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lArr;", 
+            "description": "Named entity: lArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lAtail", 
+            "description": "Bad named entity: lAtail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lAtail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lAtail;", 
+            "description": "Named entity: lAtail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lBarr", 
+            "description": "Bad named entity: lBarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lBarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lBarr;", 
+            "description": "Named entity: lBarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lE", 
+            "description": "Bad named entity: lE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lE;", 
+            "description": "Named entity: lE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2266"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lEg", 
+            "description": "Bad named entity: lEg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lEg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lEg;", 
+            "description": "Named entity: lEg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lHar", 
+            "description": "Bad named entity: lHar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lHar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lHar;", 
+            "description": "Named entity: lHar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2962"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lacute", 
+            "description": "Bad named entity: lacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lacute;", 
+            "description": "Named entity: lacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&laemptyv", 
+            "description": "Bad named entity: laemptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&laemptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&laemptyv;", 
+            "description": "Named entity: laemptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lagran", 
+            "description": "Bad named entity: lagran without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lagran"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lagran;", 
+            "description": "Named entity: lagran; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2112"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lambda", 
+            "description": "Bad named entity: lambda without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lambda"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lambda;", 
+            "description": "Named entity: lambda; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lang", 
+            "description": "Bad named entity: lang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lang;", 
+            "description": "Named entity: lang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&langd", 
+            "description": "Bad named entity: langd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&langd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&langd;", 
+            "description": "Named entity: langd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2991"
+                ]
+            ]
+        }, 
+        {
+            "input": "&langle", 
+            "description": "Bad named entity: langle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&langle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&langle;", 
+            "description": "Named entity: langle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lap", 
+            "description": "Bad named entity: lap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lap;", 
+            "description": "Named entity: lap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a85"
+                ]
+            ]
+        }, 
+        {
+            "input": "&laquo", 
+            "description": "Named entity: laquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&laquo;", 
+            "description": "Named entity: laquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larr", 
+            "description": "Bad named entity: larr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larr;", 
+            "description": "Named entity: larr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2190"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrb", 
+            "description": "Bad named entity: larrb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrb;", 
+            "description": "Named entity: larrb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21e4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrbfs", 
+            "description": "Bad named entity: larrbfs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrbfs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrbfs;", 
+            "description": "Named entity: larrbfs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrfs", 
+            "description": "Bad named entity: larrfs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrfs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrfs;", 
+            "description": "Named entity: larrfs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrhk", 
+            "description": "Bad named entity: larrhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrhk;", 
+            "description": "Named entity: larrhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrlp", 
+            "description": "Bad named entity: larrlp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrlp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrlp;", 
+            "description": "Named entity: larrlp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrpl", 
+            "description": "Bad named entity: larrpl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrpl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrpl;", 
+            "description": "Named entity: larrpl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2939"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrsim", 
+            "description": "Bad named entity: larrsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrsim;", 
+            "description": "Named entity: larrsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2973"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrtl", 
+            "description": "Bad named entity: larrtl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&larrtl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&larrtl;", 
+            "description": "Named entity: larrtl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lat", 
+            "description": "Bad named entity: lat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lat;", 
+            "description": "Named entity: lat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&latail", 
+            "description": "Bad named entity: latail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&latail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&latail;", 
+            "description": "Named entity: latail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2919"
+                ]
+            ]
+        }, 
+        {
+            "input": "&late", 
+            "description": "Bad named entity: late without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&late"
+                ]
+            ]
+        }, 
+        {
+            "input": "&late;", 
+            "description": "Named entity: late; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lates", 
+            "description": "Bad named entity: lates without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lates"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lates;", 
+            "description": "Named entity: lates; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aad\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbarr", 
+            "description": "Bad named entity: lbarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbarr;", 
+            "description": "Named entity: lbarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbbrk", 
+            "description": "Bad named entity: lbbrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbbrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbbrk;", 
+            "description": "Named entity: lbbrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2772"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrace", 
+            "description": "Bad named entity: lbrace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbrace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrace;", 
+            "description": "Named entity: lbrace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "{"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrack", 
+            "description": "Bad named entity: lbrack without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbrack"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrack;", 
+            "description": "Named entity: lbrack; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "["
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrke", 
+            "description": "Bad named entity: lbrke without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbrke"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrke;", 
+            "description": "Named entity: lbrke; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u298b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrksld", 
+            "description": "Bad named entity: lbrksld without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbrksld"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrksld;", 
+            "description": "Named entity: lbrksld; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u298f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrkslu", 
+            "description": "Bad named entity: lbrkslu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lbrkslu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lbrkslu;", 
+            "description": "Named entity: lbrkslu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u298d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcaron", 
+            "description": "Bad named entity: lcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcaron;", 
+            "description": "Named entity: lcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcedil", 
+            "description": "Bad named entity: lcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcedil;", 
+            "description": "Named entity: lcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u013c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lceil", 
+            "description": "Bad named entity: lceil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lceil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lceil;", 
+            "description": "Named entity: lceil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2308"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcub", 
+            "description": "Bad named entity: lcub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lcub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcub;", 
+            "description": "Named entity: lcub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "{"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcy", 
+            "description": "Bad named entity: lcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lcy;", 
+            "description": "Named entity: lcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldca", 
+            "description": "Bad named entity: ldca without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldca;", 
+            "description": "Named entity: ldca; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2936"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldquo", 
+            "description": "Bad named entity: ldquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldquo;", 
+            "description": "Named entity: ldquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldquor", 
+            "description": "Bad named entity: ldquor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldquor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldquor;", 
+            "description": "Named entity: ldquor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldrdhar", 
+            "description": "Bad named entity: ldrdhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldrdhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldrdhar;", 
+            "description": "Named entity: ldrdhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2967"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldrushar", 
+            "description": "Bad named entity: ldrushar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldrushar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldrushar;", 
+            "description": "Named entity: ldrushar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u294b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldsh", 
+            "description": "Bad named entity: ldsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ldsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ldsh;", 
+            "description": "Named entity: ldsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&le", 
+            "description": "Bad named entity: le without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&le"
+                ]
+            ]
+        }, 
+        {
+            "input": "&le;", 
+            "description": "Named entity: le; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2264"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftarrow", 
+            "description": "Bad named entity: leftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftarrow;", 
+            "description": "Named entity: leftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2190"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftarrowtail", 
+            "description": "Bad named entity: leftarrowtail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftarrowtail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftarrowtail;", 
+            "description": "Named entity: leftarrowtail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftharpoondown", 
+            "description": "Bad named entity: leftharpoondown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftharpoondown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftharpoondown;", 
+            "description": "Named entity: leftharpoondown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftharpoonup", 
+            "description": "Bad named entity: leftharpoonup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftharpoonup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftharpoonup;", 
+            "description": "Named entity: leftharpoonup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftleftarrows", 
+            "description": "Bad named entity: leftleftarrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftleftarrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftleftarrows;", 
+            "description": "Named entity: leftleftarrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightarrow", 
+            "description": "Bad named entity: leftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightarrow;", 
+            "description": "Named entity: leftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2194"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightarrows", 
+            "description": "Bad named entity: leftrightarrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftrightarrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightarrows;", 
+            "description": "Named entity: leftrightarrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightharpoons", 
+            "description": "Bad named entity: leftrightharpoons without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftrightharpoons"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightharpoons;", 
+            "description": "Named entity: leftrightharpoons; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightsquigarrow", 
+            "description": "Bad named entity: leftrightsquigarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftrightsquigarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftrightsquigarrow;", 
+            "description": "Named entity: leftrightsquigarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftthreetimes", 
+            "description": "Bad named entity: leftthreetimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leftthreetimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leftthreetimes;", 
+            "description": "Named entity: leftthreetimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leg", 
+            "description": "Bad named entity: leg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leg;", 
+            "description": "Named entity: leg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leq", 
+            "description": "Bad named entity: leq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leq;", 
+            "description": "Named entity: leq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2264"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leqq", 
+            "description": "Bad named entity: leqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leqq;", 
+            "description": "Named entity: leqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2266"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leqslant", 
+            "description": "Bad named entity: leqslant without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&leqslant"
+                ]
+            ]
+        }, 
+        {
+            "input": "&leqslant;", 
+            "description": "Named entity: leqslant; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&les", 
+            "description": "Bad named entity: les without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&les"
+                ]
+            ]
+        }, 
+        {
+            "input": "&les;", 
+            "description": "Named entity: les; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lescc", 
+            "description": "Bad named entity: lescc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lescc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lescc;", 
+            "description": "Named entity: lescc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdot", 
+            "description": "Bad named entity: lesdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdot;", 
+            "description": "Named entity: lesdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdoto", 
+            "description": "Bad named entity: lesdoto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesdoto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdoto;", 
+            "description": "Named entity: lesdoto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a81"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdotor", 
+            "description": "Bad named entity: lesdotor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesdotor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesdotor;", 
+            "description": "Named entity: lesdotor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a83"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesg", 
+            "description": "Bad named entity: lesg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesg;", 
+            "description": "Named entity: lesg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22da\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesges", 
+            "description": "Bad named entity: lesges without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesges"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesges;", 
+            "description": "Named entity: lesges; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a93"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessapprox", 
+            "description": "Bad named entity: lessapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lessapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessapprox;", 
+            "description": "Named entity: lessapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a85"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessdot", 
+            "description": "Bad named entity: lessdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lessdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessdot;", 
+            "description": "Named entity: lessdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesseqgtr", 
+            "description": "Bad named entity: lesseqgtr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesseqgtr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesseqgtr;", 
+            "description": "Named entity: lesseqgtr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesseqqgtr", 
+            "description": "Bad named entity: lesseqqgtr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesseqqgtr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesseqqgtr;", 
+            "description": "Named entity: lesseqqgtr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessgtr", 
+            "description": "Bad named entity: lessgtr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lessgtr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lessgtr;", 
+            "description": "Named entity: lessgtr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2276"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesssim", 
+            "description": "Bad named entity: lesssim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lesssim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lesssim;", 
+            "description": "Named entity: lesssim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2272"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfisht", 
+            "description": "Bad named entity: lfisht without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lfisht"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfisht;", 
+            "description": "Named entity: lfisht; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u297c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfloor", 
+            "description": "Bad named entity: lfloor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lfloor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfloor;", 
+            "description": "Named entity: lfloor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfr", 
+            "description": "Bad named entity: lfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lfr;", 
+            "description": "Named entity: lfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd29"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lg", 
+            "description": "Bad named entity: lg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lg;", 
+            "description": "Named entity: lg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2276"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lgE", 
+            "description": "Bad named entity: lgE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lgE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lgE;", 
+            "description": "Named entity: lgE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a91"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lhard", 
+            "description": "Bad named entity: lhard without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lhard"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lhard;", 
+            "description": "Named entity: lhard; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lharu", 
+            "description": "Bad named entity: lharu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lharu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lharu;", 
+            "description": "Named entity: lharu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lharul", 
+            "description": "Bad named entity: lharul without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lharul"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lharul;", 
+            "description": "Named entity: lharul; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lhblk", 
+            "description": "Bad named entity: lhblk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lhblk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lhblk;", 
+            "description": "Named entity: lhblk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2584"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ljcy", 
+            "description": "Bad named entity: ljcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ljcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ljcy;", 
+            "description": "Named entity: ljcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0459"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ll", 
+            "description": "Bad named entity: ll without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ll"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ll;", 
+            "description": "Named entity: ll; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llarr", 
+            "description": "Bad named entity: llarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&llarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llarr;", 
+            "description": "Named entity: llarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llcorner", 
+            "description": "Bad named entity: llcorner without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&llcorner"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llcorner;", 
+            "description": "Named entity: llcorner; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llhard", 
+            "description": "Bad named entity: llhard without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&llhard"
+                ]
+            ]
+        }, 
+        {
+            "input": "&llhard;", 
+            "description": "Named entity: llhard; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lltri", 
+            "description": "Bad named entity: lltri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lltri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lltri;", 
+            "description": "Named entity: lltri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmidot", 
+            "description": "Bad named entity: lmidot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lmidot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmidot;", 
+            "description": "Named entity: lmidot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0140"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmoust", 
+            "description": "Bad named entity: lmoust without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lmoust"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmoust;", 
+            "description": "Named entity: lmoust; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmoustache", 
+            "description": "Bad named entity: lmoustache without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lmoustache"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lmoustache;", 
+            "description": "Named entity: lmoustache; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnE", 
+            "description": "Bad named entity: lnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnE;", 
+            "description": "Named entity: lnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2268"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnap", 
+            "description": "Bad named entity: lnap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lnap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnap;", 
+            "description": "Named entity: lnap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a89"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnapprox", 
+            "description": "Bad named entity: lnapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lnapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnapprox;", 
+            "description": "Named entity: lnapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a89"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lne", 
+            "description": "Bad named entity: lne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lne;", 
+            "description": "Named entity: lne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a87"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lneq", 
+            "description": "Bad named entity: lneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lneq;", 
+            "description": "Named entity: lneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a87"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lneqq", 
+            "description": "Bad named entity: lneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lneqq;", 
+            "description": "Named entity: lneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2268"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnsim", 
+            "description": "Bad named entity: lnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lnsim;", 
+            "description": "Named entity: lnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loang", 
+            "description": "Bad named entity: loang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&loang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loang;", 
+            "description": "Named entity: loang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loarr", 
+            "description": "Bad named entity: loarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&loarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loarr;", 
+            "description": "Named entity: loarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21fd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lobrk", 
+            "description": "Bad named entity: lobrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lobrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lobrk;", 
+            "description": "Named entity: lobrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longleftarrow", 
+            "description": "Bad named entity: longleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&longleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longleftarrow;", 
+            "description": "Named entity: longleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longleftrightarrow", 
+            "description": "Bad named entity: longleftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&longleftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longleftrightarrow;", 
+            "description": "Named entity: longleftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longmapsto", 
+            "description": "Bad named entity: longmapsto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&longmapsto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longmapsto;", 
+            "description": "Named entity: longmapsto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longrightarrow", 
+            "description": "Bad named entity: longrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&longrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&longrightarrow;", 
+            "description": "Named entity: longrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&looparrowleft", 
+            "description": "Bad named entity: looparrowleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&looparrowleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&looparrowleft;", 
+            "description": "Named entity: looparrowleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ab"
+                ]
+            ]
+        }, 
+        {
+            "input": "&looparrowright", 
+            "description": "Bad named entity: looparrowright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&looparrowright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&looparrowright;", 
+            "description": "Named entity: looparrowright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lopar", 
+            "description": "Bad named entity: lopar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lopar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lopar;", 
+            "description": "Named entity: lopar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2985"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lopf", 
+            "description": "Bad named entity: lopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lopf;", 
+            "description": "Named entity: lopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loplus", 
+            "description": "Bad named entity: loplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&loplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loplus;", 
+            "description": "Named entity: loplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a2d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lotimes", 
+            "description": "Bad named entity: lotimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lotimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lotimes;", 
+            "description": "Named entity: lotimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a34"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lowast", 
+            "description": "Bad named entity: lowast without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lowast"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lowast;", 
+            "description": "Named entity: lowast; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2217"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lowbar", 
+            "description": "Bad named entity: lowbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lowbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lowbar;", 
+            "description": "Named entity: lowbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "_"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loz", 
+            "description": "Bad named entity: loz without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&loz"
+                ]
+            ]
+        }, 
+        {
+            "input": "&loz;", 
+            "description": "Named entity: loz; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lozenge", 
+            "description": "Bad named entity: lozenge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lozenge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lozenge;", 
+            "description": "Named entity: lozenge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lozf", 
+            "description": "Bad named entity: lozf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lozf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lozf;", 
+            "description": "Named entity: lozf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lpar", 
+            "description": "Bad named entity: lpar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lpar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lpar;", 
+            "description": "Named entity: lpar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "("
+                ]
+            ]
+        }, 
+        {
+            "input": "&lparlt", 
+            "description": "Bad named entity: lparlt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lparlt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lparlt;", 
+            "description": "Named entity: lparlt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2993"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrarr", 
+            "description": "Bad named entity: lrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrarr;", 
+            "description": "Named entity: lrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrcorner", 
+            "description": "Bad named entity: lrcorner without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrcorner"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrcorner;", 
+            "description": "Named entity: lrcorner; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrhar", 
+            "description": "Bad named entity: lrhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrhar;", 
+            "description": "Named entity: lrhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrhard", 
+            "description": "Bad named entity: lrhard without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrhard"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrhard;", 
+            "description": "Named entity: lrhard; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrm", 
+            "description": "Bad named entity: lrm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrm;", 
+            "description": "Named entity: lrm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrtri", 
+            "description": "Bad named entity: lrtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lrtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lrtri;", 
+            "description": "Named entity: lrtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsaquo", 
+            "description": "Bad named entity: lsaquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsaquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsaquo;", 
+            "description": "Named entity: lsaquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2039"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lscr", 
+            "description": "Bad named entity: lscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lscr;", 
+            "description": "Named entity: lscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsh", 
+            "description": "Bad named entity: lsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsh;", 
+            "description": "Named entity: lsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsim", 
+            "description": "Bad named entity: lsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsim;", 
+            "description": "Named entity: lsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2272"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsime", 
+            "description": "Bad named entity: lsime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsime;", 
+            "description": "Named entity: lsime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsimg", 
+            "description": "Bad named entity: lsimg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsimg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsimg;", 
+            "description": "Named entity: lsimg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a8f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsqb", 
+            "description": "Bad named entity: lsqb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsqb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsqb;", 
+            "description": "Named entity: lsqb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "["
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsquo", 
+            "description": "Bad named entity: lsquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsquo;", 
+            "description": "Named entity: lsquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2018"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsquor", 
+            "description": "Bad named entity: lsquor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lsquor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lsquor;", 
+            "description": "Named entity: lsquor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lstrok", 
+            "description": "Bad named entity: lstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lstrok;", 
+            "description": "Named entity: lstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0142"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lt", 
+            "description": "Named entity: lt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "<"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lt;", 
+            "description": "Named entity: lt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "<"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltcc;", 
+            "description": "Named entity: ltcc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltcir;", 
+            "description": "Named entity: ltcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a79"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltdot;", 
+            "description": "Named entity: ltdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lthree;", 
+            "description": "Named entity: lthree; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltimes;", 
+            "description": "Named entity: ltimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltlarr;", 
+            "description": "Named entity: ltlarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2976"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltquest;", 
+            "description": "Named entity: ltquest; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltrPar;", 
+            "description": "Named entity: ltrPar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2996"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltri;", 
+            "description": "Named entity: ltri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltrie;", 
+            "description": "Named entity: ltrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ltrif;", 
+            "description": "Named entity: ltrif; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lurdshar", 
+            "description": "Bad named entity: lurdshar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lurdshar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lurdshar;", 
+            "description": "Named entity: lurdshar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u294a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&luruhar", 
+            "description": "Bad named entity: luruhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&luruhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&luruhar;", 
+            "description": "Named entity: luruhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2966"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lvertneqq", 
+            "description": "Bad named entity: lvertneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lvertneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lvertneqq;", 
+            "description": "Named entity: lvertneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2268\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lvnE", 
+            "description": "Bad named entity: lvnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&lvnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&lvnE;", 
+            "description": "Named entity: lvnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2268\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mDDot", 
+            "description": "Bad named entity: mDDot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mDDot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mDDot;", 
+            "description": "Named entity: mDDot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&macr", 
+            "description": "Named entity: macr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&macr;", 
+            "description": "Named entity: macr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&male", 
+            "description": "Bad named entity: male without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&male"
+                ]
+            ]
+        }, 
+        {
+            "input": "&male;", 
+            "description": "Named entity: male; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2642"
+                ]
+            ]
+        }, 
+        {
+            "input": "&malt", 
+            "description": "Bad named entity: malt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&malt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&malt;", 
+            "description": "Named entity: malt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2720"
+                ]
+            ]
+        }, 
+        {
+            "input": "&maltese", 
+            "description": "Bad named entity: maltese without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&maltese"
+                ]
+            ]
+        }, 
+        {
+            "input": "&maltese;", 
+            "description": "Named entity: maltese; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2720"
+                ]
+            ]
+        }, 
+        {
+            "input": "&map", 
+            "description": "Bad named entity: map without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&map"
+                ]
+            ]
+        }, 
+        {
+            "input": "&map;", 
+            "description": "Named entity: map; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapsto", 
+            "description": "Bad named entity: mapsto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mapsto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapsto;", 
+            "description": "Named entity: mapsto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstodown", 
+            "description": "Bad named entity: mapstodown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mapstodown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstodown;", 
+            "description": "Named entity: mapstodown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstoleft", 
+            "description": "Bad named entity: mapstoleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mapstoleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstoleft;", 
+            "description": "Named entity: mapstoleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstoup", 
+            "description": "Bad named entity: mapstoup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mapstoup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mapstoup;", 
+            "description": "Named entity: mapstoup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&marker", 
+            "description": "Bad named entity: marker without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&marker"
+                ]
+            ]
+        }, 
+        {
+            "input": "&marker;", 
+            "description": "Named entity: marker; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mcomma", 
+            "description": "Bad named entity: mcomma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mcomma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mcomma;", 
+            "description": "Named entity: mcomma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a29"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mcy", 
+            "description": "Bad named entity: mcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mcy;", 
+            "description": "Named entity: mcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mdash", 
+            "description": "Bad named entity: mdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mdash;", 
+            "description": "Named entity: mdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2014"
+                ]
+            ]
+        }, 
+        {
+            "input": "&measuredangle", 
+            "description": "Bad named entity: measuredangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&measuredangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&measuredangle;", 
+            "description": "Named entity: measuredangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2221"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mfr", 
+            "description": "Bad named entity: mfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mfr;", 
+            "description": "Named entity: mfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mho", 
+            "description": "Bad named entity: mho without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mho"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mho;", 
+            "description": "Named entity: mho; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2127"
+                ]
+            ]
+        }, 
+        {
+            "input": "&micro", 
+            "description": "Named entity: micro without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&micro;", 
+            "description": "Named entity: micro; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mid", 
+            "description": "Bad named entity: mid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mid;", 
+            "description": "Named entity: mid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2223"
+                ]
+            ]
+        }, 
+        {
+            "input": "&midast", 
+            "description": "Bad named entity: midast without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&midast"
+                ]
+            ]
+        }, 
+        {
+            "input": "&midast;", 
+            "description": "Named entity: midast; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "*"
+                ]
+            ]
+        }, 
+        {
+            "input": "&midcir", 
+            "description": "Bad named entity: midcir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&midcir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&midcir;", 
+            "description": "Named entity: midcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2af0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&middot", 
+            "description": "Named entity: middot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&middot;", 
+            "description": "Named entity: middot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minus", 
+            "description": "Bad named entity: minus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&minus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minus;", 
+            "description": "Named entity: minus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2212"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusb", 
+            "description": "Bad named entity: minusb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&minusb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusb;", 
+            "description": "Named entity: minusb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusd", 
+            "description": "Bad named entity: minusd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&minusd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusd;", 
+            "description": "Named entity: minusd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2238"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusdu", 
+            "description": "Bad named entity: minusdu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&minusdu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&minusdu;", 
+            "description": "Named entity: minusdu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a2a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mlcp", 
+            "description": "Bad named entity: mlcp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mlcp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mlcp;", 
+            "description": "Named entity: mlcp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2adb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mldr", 
+            "description": "Bad named entity: mldr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mldr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mldr;", 
+            "description": "Named entity: mldr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2026"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mnplus", 
+            "description": "Bad named entity: mnplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mnplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mnplus;", 
+            "description": "Named entity: mnplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2213"
+                ]
+            ]
+        }, 
+        {
+            "input": "&models", 
+            "description": "Bad named entity: models without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&models"
+                ]
+            ]
+        }, 
+        {
+            "input": "&models;", 
+            "description": "Named entity: models; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mopf", 
+            "description": "Bad named entity: mopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mopf;", 
+            "description": "Named entity: mopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mp", 
+            "description": "Bad named entity: mp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mp;", 
+            "description": "Named entity: mp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2213"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mscr", 
+            "description": "Bad named entity: mscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mscr;", 
+            "description": "Named entity: mscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mstpos", 
+            "description": "Bad named entity: mstpos without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mstpos"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mstpos;", 
+            "description": "Named entity: mstpos; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mu", 
+            "description": "Bad named entity: mu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mu;", 
+            "description": "Named entity: mu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&multimap", 
+            "description": "Bad named entity: multimap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&multimap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&multimap;", 
+            "description": "Named entity: multimap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mumap", 
+            "description": "Bad named entity: mumap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&mumap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&mumap;", 
+            "description": "Named entity: mumap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGg", 
+            "description": "Bad named entity: nGg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nGg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGg;", 
+            "description": "Named entity: nGg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d9\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGt", 
+            "description": "Bad named entity: nGt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nGt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGt;", 
+            "description": "Named entity: nGt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGtv", 
+            "description": "Bad named entity: nGtv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nGtv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nGtv;", 
+            "description": "Named entity: nGtv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226b\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLeftarrow", 
+            "description": "Bad named entity: nLeftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nLeftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLeftarrow;", 
+            "description": "Named entity: nLeftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLeftrightarrow", 
+            "description": "Bad named entity: nLeftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nLeftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLeftrightarrow;", 
+            "description": "Named entity: nLeftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLl", 
+            "description": "Bad named entity: nLl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nLl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLl;", 
+            "description": "Named entity: nLl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d8\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLt", 
+            "description": "Bad named entity: nLt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nLt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLt;", 
+            "description": "Named entity: nLt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLtv", 
+            "description": "Bad named entity: nLtv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nLtv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nLtv;", 
+            "description": "Named entity: nLtv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226a\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nRightarrow", 
+            "description": "Bad named entity: nRightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nRightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nRightarrow;", 
+            "description": "Named entity: nRightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nVDash", 
+            "description": "Bad named entity: nVDash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nVDash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nVDash;", 
+            "description": "Named entity: nVDash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nVdash", 
+            "description": "Bad named entity: nVdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nVdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nVdash;", 
+            "description": "Named entity: nVdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nabla", 
+            "description": "Bad named entity: nabla without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nabla"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nabla;", 
+            "description": "Named entity: nabla; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2207"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nacute", 
+            "description": "Bad named entity: nacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nacute;", 
+            "description": "Named entity: nacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0144"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nang", 
+            "description": "Bad named entity: nang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nang;", 
+            "description": "Named entity: nang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2220\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nap", 
+            "description": "Bad named entity: nap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nap;", 
+            "description": "Named entity: nap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2249"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napE", 
+            "description": "Bad named entity: napE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&napE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napE;", 
+            "description": "Named entity: napE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a70\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napid", 
+            "description": "Bad named entity: napid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&napid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napid;", 
+            "description": "Named entity: napid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224b\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napos", 
+            "description": "Bad named entity: napos without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&napos"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napos;", 
+            "description": "Named entity: napos; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0149"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napprox", 
+            "description": "Bad named entity: napprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&napprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&napprox;", 
+            "description": "Named entity: napprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2249"
+                ]
+            ]
+        }, 
+        {
+            "input": "&natur", 
+            "description": "Bad named entity: natur without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&natur"
+                ]
+            ]
+        }, 
+        {
+            "input": "&natur;", 
+            "description": "Named entity: natur; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u266e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&natural", 
+            "description": "Bad named entity: natural without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&natural"
+                ]
+            ]
+        }, 
+        {
+            "input": "&natural;", 
+            "description": "Named entity: natural; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u266e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&naturals", 
+            "description": "Bad named entity: naturals without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&naturals"
+                ]
+            ]
+        }, 
+        {
+            "input": "&naturals;", 
+            "description": "Named entity: naturals; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2115"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbsp", 
+            "description": "Named entity: nbsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbsp;", 
+            "description": "Named entity: nbsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbump", 
+            "description": "Bad named entity: nbump without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nbump"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbump;", 
+            "description": "Named entity: nbump; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224e\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbumpe", 
+            "description": "Bad named entity: nbumpe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nbumpe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nbumpe;", 
+            "description": "Named entity: nbumpe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224f\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncap", 
+            "description": "Bad named entity: ncap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncap;", 
+            "description": "Named entity: ncap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a43"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncaron", 
+            "description": "Bad named entity: ncaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncaron;", 
+            "description": "Named entity: ncaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0148"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncedil", 
+            "description": "Bad named entity: ncedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncedil;", 
+            "description": "Named entity: ncedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0146"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncong", 
+            "description": "Bad named entity: ncong without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncong"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncong;", 
+            "description": "Named entity: ncong; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2247"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncongdot", 
+            "description": "Bad named entity: ncongdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncongdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncongdot;", 
+            "description": "Named entity: ncongdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a6d\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncup", 
+            "description": "Bad named entity: ncup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncup;", 
+            "description": "Named entity: ncup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a42"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncy", 
+            "description": "Bad named entity: ncy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ncy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ncy;", 
+            "description": "Named entity: ncy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ndash", 
+            "description": "Bad named entity: ndash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ndash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ndash;", 
+            "description": "Named entity: ndash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2013"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ne", 
+            "description": "Bad named entity: ne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ne;", 
+            "description": "Named entity: ne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2260"
+                ]
+            ]
+        }, 
+        {
+            "input": "&neArr", 
+            "description": "Bad named entity: neArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&neArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&neArr;", 
+            "description": "Named entity: neArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearhk", 
+            "description": "Bad named entity: nearhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nearhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearhk;", 
+            "description": "Named entity: nearhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2924"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearr", 
+            "description": "Bad named entity: nearr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nearr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearr;", 
+            "description": "Named entity: nearr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2197"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearrow", 
+            "description": "Bad named entity: nearrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nearrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nearrow;", 
+            "description": "Named entity: nearrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2197"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nedot", 
+            "description": "Bad named entity: nedot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nedot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nedot;", 
+            "description": "Named entity: nedot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2250\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nequiv", 
+            "description": "Bad named entity: nequiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nequiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nequiv;", 
+            "description": "Named entity: nequiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2262"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nesear", 
+            "description": "Bad named entity: nesear without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nesear"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nesear;", 
+            "description": "Named entity: nesear; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2928"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nesim", 
+            "description": "Bad named entity: nesim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nesim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nesim;", 
+            "description": "Named entity: nesim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2242\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nexist", 
+            "description": "Bad named entity: nexist without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nexist"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nexist;", 
+            "description": "Named entity: nexist; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2204"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nexists", 
+            "description": "Bad named entity: nexists without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nexists"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nexists;", 
+            "description": "Named entity: nexists; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2204"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nfr", 
+            "description": "Bad named entity: nfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nfr;", 
+            "description": "Named entity: nfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngE", 
+            "description": "Bad named entity: ngE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngE;", 
+            "description": "Named entity: ngE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nge", 
+            "description": "Bad named entity: nge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nge;", 
+            "description": "Named entity: nge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2271"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeq", 
+            "description": "Bad named entity: ngeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeq;", 
+            "description": "Named entity: ngeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2271"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeqq", 
+            "description": "Bad named entity: ngeqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngeqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeqq;", 
+            "description": "Named entity: ngeqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2267\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeqslant", 
+            "description": "Bad named entity: ngeqslant without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngeqslant"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngeqslant;", 
+            "description": "Named entity: ngeqslant; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nges", 
+            "description": "Bad named entity: nges without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nges"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nges;", 
+            "description": "Named entity: nges; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7e\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngsim", 
+            "description": "Bad named entity: ngsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngsim;", 
+            "description": "Named entity: ngsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2275"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngt", 
+            "description": "Bad named entity: ngt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngt;", 
+            "description": "Named entity: ngt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngtr", 
+            "description": "Bad named entity: ngtr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ngtr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ngtr;", 
+            "description": "Named entity: ngtr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nhArr", 
+            "description": "Bad named entity: nhArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nhArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nhArr;", 
+            "description": "Named entity: nhArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nharr", 
+            "description": "Bad named entity: nharr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nharr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nharr;", 
+            "description": "Named entity: nharr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nhpar", 
+            "description": "Bad named entity: nhpar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nhpar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nhpar;", 
+            "description": "Named entity: nhpar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2af2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ni", 
+            "description": "Bad named entity: ni without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ni"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ni;", 
+            "description": "Named entity: ni; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nis", 
+            "description": "Bad named entity: nis without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nis"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nis;", 
+            "description": "Named entity: nis; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nisd", 
+            "description": "Bad named entity: nisd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nisd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nisd;", 
+            "description": "Named entity: nisd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&niv", 
+            "description": "Bad named entity: niv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&niv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&niv;", 
+            "description": "Named entity: niv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&njcy", 
+            "description": "Bad named entity: njcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&njcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&njcy;", 
+            "description": "Named entity: njcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u045a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlArr", 
+            "description": "Bad named entity: nlArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nlArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlArr;", 
+            "description": "Named entity: nlArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlE", 
+            "description": "Bad named entity: nlE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nlE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlE;", 
+            "description": "Named entity: nlE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2266\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlarr", 
+            "description": "Bad named entity: nlarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nlarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlarr;", 
+            "description": "Named entity: nlarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nldr", 
+            "description": "Bad named entity: nldr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nldr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nldr;", 
+            "description": "Named entity: nldr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2025"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nle", 
+            "description": "Bad named entity: nle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nle;", 
+            "description": "Named entity: nle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2270"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleftarrow", 
+            "description": "Bad named entity: nleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleftarrow;", 
+            "description": "Named entity: nleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleftrightarrow", 
+            "description": "Bad named entity: nleftrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nleftrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleftrightarrow;", 
+            "description": "Named entity: nleftrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleq", 
+            "description": "Bad named entity: nleq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nleq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleq;", 
+            "description": "Named entity: nleq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2270"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleqq", 
+            "description": "Bad named entity: nleqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nleqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleqq;", 
+            "description": "Named entity: nleqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2266\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleqslant", 
+            "description": "Bad named entity: nleqslant without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nleqslant"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nleqslant;", 
+            "description": "Named entity: nleqslant; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nles", 
+            "description": "Bad named entity: nles without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nles"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nles;", 
+            "description": "Named entity: nles; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a7d\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nless", 
+            "description": "Bad named entity: nless without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nless"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nless;", 
+            "description": "Named entity: nless; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlsim", 
+            "description": "Bad named entity: nlsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nlsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlsim;", 
+            "description": "Named entity: nlsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2274"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlt", 
+            "description": "Bad named entity: nlt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nlt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nlt;", 
+            "description": "Named entity: nlt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nltri", 
+            "description": "Bad named entity: nltri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nltri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nltri;", 
+            "description": "Named entity: nltri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nltrie", 
+            "description": "Bad named entity: nltrie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nltrie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nltrie;", 
+            "description": "Named entity: nltrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nmid", 
+            "description": "Bad named entity: nmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nmid;", 
+            "description": "Named entity: nmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2224"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nopf", 
+            "description": "Bad named entity: nopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nopf;", 
+            "description": "Named entity: nopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd5f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&not", 
+            "description": "Named entity: not without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&not;", 
+            "description": "Named entity: not; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notin;", 
+            "description": "Named entity: notin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2209"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notinE;", 
+            "description": "Named entity: notinE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f9\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notindot;", 
+            "description": "Named entity: notindot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f5\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notinva;", 
+            "description": "Named entity: notinva; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2209"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notinvb;", 
+            "description": "Named entity: notinvb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notinvc;", 
+            "description": "Named entity: notinvc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notni;", 
+            "description": "Named entity: notni; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notniva;", 
+            "description": "Named entity: notniva; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notnivb;", 
+            "description": "Named entity: notnivb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22fe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&notnivc;", 
+            "description": "Named entity: notnivc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22fd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npar", 
+            "description": "Bad named entity: npar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npar;", 
+            "description": "Named entity: npar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2226"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nparallel", 
+            "description": "Bad named entity: nparallel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nparallel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nparallel;", 
+            "description": "Named entity: nparallel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2226"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nparsl", 
+            "description": "Bad named entity: nparsl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nparsl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nparsl;", 
+            "description": "Named entity: nparsl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2afd\u20e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npart", 
+            "description": "Bad named entity: npart without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npart"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npart;", 
+            "description": "Named entity: npart; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2202\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npolint", 
+            "description": "Bad named entity: npolint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npolint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npolint;", 
+            "description": "Named entity: npolint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a14"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npr", 
+            "description": "Bad named entity: npr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npr;", 
+            "description": "Named entity: npr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2280"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nprcue", 
+            "description": "Bad named entity: nprcue without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nprcue"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nprcue;", 
+            "description": "Named entity: nprcue; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npre", 
+            "description": "Bad named entity: npre without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npre"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npre;", 
+            "description": "Named entity: npre; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nprec", 
+            "description": "Bad named entity: nprec without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nprec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nprec;", 
+            "description": "Named entity: nprec; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2280"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npreceq", 
+            "description": "Bad named entity: npreceq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&npreceq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&npreceq;", 
+            "description": "Named entity: npreceq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrArr", 
+            "description": "Bad named entity: nrArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrArr;", 
+            "description": "Named entity: nrArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarr", 
+            "description": "Bad named entity: nrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarr;", 
+            "description": "Named entity: nrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarrc", 
+            "description": "Bad named entity: nrarrc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrarrc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarrc;", 
+            "description": "Named entity: nrarrc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2933\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarrw", 
+            "description": "Bad named entity: nrarrw without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrarrw"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrarrw;", 
+            "description": "Named entity: nrarrw; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219d\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrightarrow", 
+            "description": "Bad named entity: nrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrightarrow;", 
+            "description": "Named entity: nrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrtri", 
+            "description": "Bad named entity: nrtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrtri;", 
+            "description": "Named entity: nrtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrtrie", 
+            "description": "Bad named entity: nrtrie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nrtrie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nrtrie;", 
+            "description": "Named entity: nrtrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsc", 
+            "description": "Bad named entity: nsc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsc;", 
+            "description": "Named entity: nsc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2281"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsccue", 
+            "description": "Bad named entity: nsccue without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsccue"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsccue;", 
+            "description": "Named entity: nsccue; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsce", 
+            "description": "Bad named entity: nsce without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsce;", 
+            "description": "Named entity: nsce; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nscr", 
+            "description": "Bad named entity: nscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nscr;", 
+            "description": "Named entity: nscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nshortmid", 
+            "description": "Bad named entity: nshortmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nshortmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nshortmid;", 
+            "description": "Named entity: nshortmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2224"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nshortparallel", 
+            "description": "Bad named entity: nshortparallel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nshortparallel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nshortparallel;", 
+            "description": "Named entity: nshortparallel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2226"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsim", 
+            "description": "Bad named entity: nsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsim;", 
+            "description": "Named entity: nsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2241"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsime", 
+            "description": "Bad named entity: nsime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsime;", 
+            "description": "Named entity: nsime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2244"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsimeq", 
+            "description": "Bad named entity: nsimeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsimeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsimeq;", 
+            "description": "Named entity: nsimeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2244"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsmid", 
+            "description": "Bad named entity: nsmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsmid;", 
+            "description": "Named entity: nsmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2224"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nspar", 
+            "description": "Bad named entity: nspar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nspar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nspar;", 
+            "description": "Named entity: nspar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2226"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsqsube", 
+            "description": "Bad named entity: nsqsube without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsqsube"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsqsube;", 
+            "description": "Named entity: nsqsube; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsqsupe", 
+            "description": "Bad named entity: nsqsupe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsqsupe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsqsupe;", 
+            "description": "Named entity: nsqsupe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsub", 
+            "description": "Bad named entity: nsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsub;", 
+            "description": "Named entity: nsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2284"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubE", 
+            "description": "Bad named entity: nsubE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsubE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubE;", 
+            "description": "Named entity: nsubE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac5\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsube", 
+            "description": "Bad named entity: nsube without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsube"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsube;", 
+            "description": "Named entity: nsube; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2288"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubset", 
+            "description": "Bad named entity: nsubset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsubset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubset;", 
+            "description": "Named entity: nsubset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2282\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubseteq", 
+            "description": "Bad named entity: nsubseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsubseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubseteq;", 
+            "description": "Named entity: nsubseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2288"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubseteqq", 
+            "description": "Bad named entity: nsubseteqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsubseteqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsubseteqq;", 
+            "description": "Named entity: nsubseteqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac5\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsucc", 
+            "description": "Bad named entity: nsucc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsucc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsucc;", 
+            "description": "Named entity: nsucc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2281"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsucceq", 
+            "description": "Bad named entity: nsucceq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsucceq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsucceq;", 
+            "description": "Named entity: nsucceq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsup", 
+            "description": "Bad named entity: nsup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsup;", 
+            "description": "Named entity: nsup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2285"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupE", 
+            "description": "Bad named entity: nsupE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsupE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupE;", 
+            "description": "Named entity: nsupE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac6\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupe", 
+            "description": "Bad named entity: nsupe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsupe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupe;", 
+            "description": "Named entity: nsupe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2289"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupset", 
+            "description": "Bad named entity: nsupset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsupset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupset;", 
+            "description": "Named entity: nsupset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupseteq", 
+            "description": "Bad named entity: nsupseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsupseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupseteq;", 
+            "description": "Named entity: nsupseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2289"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupseteqq", 
+            "description": "Bad named entity: nsupseteqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nsupseteqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nsupseteqq;", 
+            "description": "Named entity: nsupseteqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac6\u0338"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntgl", 
+            "description": "Bad named entity: ntgl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntgl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntgl;", 
+            "description": "Named entity: ntgl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2279"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntilde", 
+            "description": "Named entity: ntilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntilde;", 
+            "description": "Named entity: ntilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntlg", 
+            "description": "Bad named entity: ntlg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntlg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntlg;", 
+            "description": "Named entity: ntlg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2278"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntriangleleft", 
+            "description": "Bad named entity: ntriangleleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntriangleleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntriangleleft;", 
+            "description": "Named entity: ntriangleleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntrianglelefteq", 
+            "description": "Bad named entity: ntrianglelefteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntrianglelefteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntrianglelefteq;", 
+            "description": "Named entity: ntrianglelefteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntriangleright", 
+            "description": "Bad named entity: ntriangleright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntriangleright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntriangleright;", 
+            "description": "Named entity: ntriangleright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22eb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntrianglerighteq", 
+            "description": "Bad named entity: ntrianglerighteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ntrianglerighteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ntrianglerighteq;", 
+            "description": "Named entity: ntrianglerighteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nu", 
+            "description": "Bad named entity: nu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nu;", 
+            "description": "Named entity: nu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&num", 
+            "description": "Bad named entity: num without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&num"
+                ]
+            ]
+        }, 
+        {
+            "input": "&num;", 
+            "description": "Named entity: num; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "#"
+                ]
+            ]
+        }, 
+        {
+            "input": "&numero", 
+            "description": "Bad named entity: numero without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&numero"
+                ]
+            ]
+        }, 
+        {
+            "input": "&numero;", 
+            "description": "Named entity: numero; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2116"
+                ]
+            ]
+        }, 
+        {
+            "input": "&numsp", 
+            "description": "Bad named entity: numsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&numsp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&numsp;", 
+            "description": "Named entity: numsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2007"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvDash", 
+            "description": "Bad named entity: nvDash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvDash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvDash;", 
+            "description": "Named entity: nvDash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvHarr", 
+            "description": "Bad named entity: nvHarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvHarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvHarr;", 
+            "description": "Named entity: nvHarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2904"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvap", 
+            "description": "Bad named entity: nvap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvap;", 
+            "description": "Named entity: nvap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u224d\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvdash", 
+            "description": "Bad named entity: nvdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvdash;", 
+            "description": "Named entity: nvdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvge", 
+            "description": "Bad named entity: nvge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvge;", 
+            "description": "Named entity: nvge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2265\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvgt", 
+            "description": "Bad named entity: nvgt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvgt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvgt;", 
+            "description": "Named entity: nvgt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ">\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvinfin", 
+            "description": "Bad named entity: nvinfin without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvinfin"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvinfin;", 
+            "description": "Named entity: nvinfin; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29de"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvlArr", 
+            "description": "Bad named entity: nvlArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvlArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvlArr;", 
+            "description": "Named entity: nvlArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2902"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvle", 
+            "description": "Bad named entity: nvle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvle;", 
+            "description": "Named entity: nvle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2264\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvlt", 
+            "description": "Bad named entity: nvlt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvlt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvlt;", 
+            "description": "Named entity: nvlt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "<\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvltrie", 
+            "description": "Bad named entity: nvltrie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvltrie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvltrie;", 
+            "description": "Named entity: nvltrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b4\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvrArr", 
+            "description": "Bad named entity: nvrArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvrArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvrArr;", 
+            "description": "Named entity: nvrArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2903"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvrtrie", 
+            "description": "Bad named entity: nvrtrie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvrtrie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvrtrie;", 
+            "description": "Named entity: nvrtrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b5\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvsim", 
+            "description": "Bad named entity: nvsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nvsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nvsim;", 
+            "description": "Named entity: nvsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223c\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwArr", 
+            "description": "Bad named entity: nwArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nwArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwArr;", 
+            "description": "Named entity: nwArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarhk", 
+            "description": "Bad named entity: nwarhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nwarhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarhk;", 
+            "description": "Named entity: nwarhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2923"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarr", 
+            "description": "Bad named entity: nwarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nwarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarr;", 
+            "description": "Named entity: nwarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2196"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarrow", 
+            "description": "Bad named entity: nwarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nwarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwarrow;", 
+            "description": "Named entity: nwarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2196"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwnear", 
+            "description": "Bad named entity: nwnear without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&nwnear"
+                ]
+            ]
+        }, 
+        {
+            "input": "&nwnear;", 
+            "description": "Named entity: nwnear; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2927"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oS", 
+            "description": "Bad named entity: oS without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oS"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oS;", 
+            "description": "Named entity: oS; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u24c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oacute", 
+            "description": "Named entity: oacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oacute;", 
+            "description": "Named entity: oacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oast", 
+            "description": "Bad named entity: oast without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oast"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oast;", 
+            "description": "Named entity: oast; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocir", 
+            "description": "Bad named entity: ocir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ocir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocir;", 
+            "description": "Named entity: ocir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocirc", 
+            "description": "Named entity: ocirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocirc;", 
+            "description": "Named entity: ocirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocy", 
+            "description": "Bad named entity: ocy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ocy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ocy;", 
+            "description": "Named entity: ocy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odash", 
+            "description": "Bad named entity: odash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&odash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odash;", 
+            "description": "Named entity: odash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odblac", 
+            "description": "Bad named entity: odblac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&odblac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odblac;", 
+            "description": "Named entity: odblac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0151"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odiv", 
+            "description": "Bad named entity: odiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&odiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odiv;", 
+            "description": "Named entity: odiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a38"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odot", 
+            "description": "Bad named entity: odot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&odot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odot;", 
+            "description": "Named entity: odot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2299"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odsold", 
+            "description": "Bad named entity: odsold without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&odsold"
+                ]
+            ]
+        }, 
+        {
+            "input": "&odsold;", 
+            "description": "Named entity: odsold; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29bc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oelig", 
+            "description": "Bad named entity: oelig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oelig"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oelig;", 
+            "description": "Named entity: oelig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0153"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ofcir", 
+            "description": "Bad named entity: ofcir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ofcir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ofcir;", 
+            "description": "Named entity: ofcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ofr", 
+            "description": "Bad named entity: ofr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ofr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ofr;", 
+            "description": "Named entity: ofr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ogon", 
+            "description": "Bad named entity: ogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ogon;", 
+            "description": "Named entity: ogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ograve", 
+            "description": "Named entity: ograve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ograve;", 
+            "description": "Named entity: ograve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ogt", 
+            "description": "Bad named entity: ogt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ogt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ogt;", 
+            "description": "Named entity: ogt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ohbar", 
+            "description": "Bad named entity: ohbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ohbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ohbar;", 
+            "description": "Named entity: ohbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ohm", 
+            "description": "Bad named entity: ohm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ohm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ohm;", 
+            "description": "Named entity: ohm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03a9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oint", 
+            "description": "Bad named entity: oint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oint;", 
+            "description": "Named entity: oint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olarr", 
+            "description": "Bad named entity: olarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&olarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olarr;", 
+            "description": "Named entity: olarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olcir", 
+            "description": "Bad named entity: olcir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&olcir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olcir;", 
+            "description": "Named entity: olcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olcross", 
+            "description": "Bad named entity: olcross without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&olcross"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olcross;", 
+            "description": "Named entity: olcross; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oline", 
+            "description": "Bad named entity: oline without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oline"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oline;", 
+            "description": "Named entity: oline; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u203e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olt", 
+            "description": "Bad named entity: olt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&olt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&olt;", 
+            "description": "Named entity: olt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omacr", 
+            "description": "Bad named entity: omacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&omacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omacr;", 
+            "description": "Named entity: omacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u014d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omega", 
+            "description": "Bad named entity: omega without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&omega"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omega;", 
+            "description": "Named entity: omega; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omicron", 
+            "description": "Bad named entity: omicron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&omicron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omicron;", 
+            "description": "Named entity: omicron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omid", 
+            "description": "Bad named entity: omid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&omid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&omid;", 
+            "description": "Named entity: omid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ominus", 
+            "description": "Bad named entity: ominus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ominus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ominus;", 
+            "description": "Named entity: ominus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2296"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oopf", 
+            "description": "Bad named entity: oopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oopf;", 
+            "description": "Named entity: oopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd60"
+                ]
+            ]
+        }, 
+        {
+            "input": "&opar", 
+            "description": "Bad named entity: opar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&opar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&opar;", 
+            "description": "Named entity: opar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&operp", 
+            "description": "Bad named entity: operp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&operp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&operp;", 
+            "description": "Named entity: operp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oplus", 
+            "description": "Bad named entity: oplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oplus;", 
+            "description": "Named entity: oplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2295"
+                ]
+            ]
+        }, 
+        {
+            "input": "&or", 
+            "description": "Bad named entity: or without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&or"
+                ]
+            ]
+        }, 
+        {
+            "input": "&or;", 
+            "description": "Named entity: or; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2228"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orarr", 
+            "description": "Bad named entity: orarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&orarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orarr;", 
+            "description": "Named entity: orarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ord", 
+            "description": "Bad named entity: ord without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ord"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ord;", 
+            "description": "Named entity: ord; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a5d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&order", 
+            "description": "Bad named entity: order without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&order"
+                ]
+            ]
+        }, 
+        {
+            "input": "&order;", 
+            "description": "Named entity: order; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2134"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orderof", 
+            "description": "Bad named entity: orderof without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&orderof"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orderof;", 
+            "description": "Named entity: orderof; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2134"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ordf", 
+            "description": "Named entity: ordf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ordf;", 
+            "description": "Named entity: ordf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ordm", 
+            "description": "Named entity: ordm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ordm;", 
+            "description": "Named entity: ordm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&origof", 
+            "description": "Bad named entity: origof without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&origof"
+                ]
+            ]
+        }, 
+        {
+            "input": "&origof;", 
+            "description": "Named entity: origof; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oror", 
+            "description": "Bad named entity: oror without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oror"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oror;", 
+            "description": "Named entity: oror; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a56"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orslope", 
+            "description": "Bad named entity: orslope without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&orslope"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orslope;", 
+            "description": "Named entity: orslope; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a57"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orv", 
+            "description": "Bad named entity: orv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&orv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&orv;", 
+            "description": "Named entity: orv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a5b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oscr", 
+            "description": "Bad named entity: oscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&oscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oscr;", 
+            "description": "Named entity: oscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2134"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oslash", 
+            "description": "Named entity: oslash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&oslash;", 
+            "description": "Named entity: oslash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&osol", 
+            "description": "Bad named entity: osol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&osol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&osol;", 
+            "description": "Named entity: osol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2298"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otilde", 
+            "description": "Named entity: otilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otilde;", 
+            "description": "Named entity: otilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otimes", 
+            "description": "Bad named entity: otimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&otimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otimes;", 
+            "description": "Named entity: otimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2297"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otimesas", 
+            "description": "Bad named entity: otimesas without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&otimesas"
+                ]
+            ]
+        }, 
+        {
+            "input": "&otimesas;", 
+            "description": "Named entity: otimesas; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a36"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ouml", 
+            "description": "Named entity: ouml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ouml;", 
+            "description": "Named entity: ouml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ovbar", 
+            "description": "Bad named entity: ovbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ovbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ovbar;", 
+            "description": "Named entity: ovbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u233d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&par", 
+            "description": "Bad named entity: par without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&par"
+                ]
+            ]
+        }, 
+        {
+            "input": "&par;", 
+            "description": "Named entity: par; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2225"
+                ]
+            ]
+        }, 
+        {
+            "input": "&para", 
+            "description": "Named entity: para without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&para;", 
+            "description": "Named entity: para; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&parallel;", 
+            "description": "Named entity: parallel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2225"
+                ]
+            ]
+        }, 
+        {
+            "input": "&parsim", 
+            "description": "Bad named entity: parsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&parsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&parsim;", 
+            "description": "Named entity: parsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2af3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&parsl", 
+            "description": "Bad named entity: parsl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&parsl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&parsl;", 
+            "description": "Named entity: parsl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2afd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&part", 
+            "description": "Bad named entity: part without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&part"
+                ]
+            ]
+        }, 
+        {
+            "input": "&part;", 
+            "description": "Named entity: part; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2202"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pcy", 
+            "description": "Bad named entity: pcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pcy;", 
+            "description": "Named entity: pcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u043f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&percnt", 
+            "description": "Bad named entity: percnt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&percnt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&percnt;", 
+            "description": "Named entity: percnt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "%"
+                ]
+            ]
+        }, 
+        {
+            "input": "&period", 
+            "description": "Bad named entity: period without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&period"
+                ]
+            ]
+        }, 
+        {
+            "input": "&period;", 
+            "description": "Named entity: period; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "."
+                ]
+            ]
+        }, 
+        {
+            "input": "&permil", 
+            "description": "Bad named entity: permil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&permil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&permil;", 
+            "description": "Named entity: permil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2030"
+                ]
+            ]
+        }, 
+        {
+            "input": "&perp", 
+            "description": "Bad named entity: perp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&perp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&perp;", 
+            "description": "Named entity: perp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pertenk", 
+            "description": "Bad named entity: pertenk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pertenk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pertenk;", 
+            "description": "Named entity: pertenk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2031"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pfr", 
+            "description": "Bad named entity: pfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pfr;", 
+            "description": "Named entity: pfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phi", 
+            "description": "Bad named entity: phi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&phi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phi;", 
+            "description": "Named entity: phi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phiv", 
+            "description": "Bad named entity: phiv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&phiv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phiv;", 
+            "description": "Named entity: phiv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phmmat", 
+            "description": "Bad named entity: phmmat without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&phmmat"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phmmat;", 
+            "description": "Named entity: phmmat; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2133"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phone", 
+            "description": "Bad named entity: phone without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&phone"
+                ]
+            ]
+        }, 
+        {
+            "input": "&phone;", 
+            "description": "Named entity: phone; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u260e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pi", 
+            "description": "Bad named entity: pi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pi;", 
+            "description": "Named entity: pi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pitchfork", 
+            "description": "Bad named entity: pitchfork without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pitchfork"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pitchfork;", 
+            "description": "Named entity: pitchfork; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22d4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&piv", 
+            "description": "Bad named entity: piv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&piv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&piv;", 
+            "description": "Named entity: piv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&planck", 
+            "description": "Bad named entity: planck without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&planck"
+                ]
+            ]
+        }, 
+        {
+            "input": "&planck;", 
+            "description": "Named entity: planck; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&planckh", 
+            "description": "Bad named entity: planckh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&planckh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&planckh;", 
+            "description": "Named entity: planckh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plankv", 
+            "description": "Bad named entity: plankv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plankv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plankv;", 
+            "description": "Named entity: plankv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plus", 
+            "description": "Bad named entity: plus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plus;", 
+            "description": "Named entity: plus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "+"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusacir", 
+            "description": "Bad named entity: plusacir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plusacir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusacir;", 
+            "description": "Named entity: plusacir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a23"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusb", 
+            "description": "Bad named entity: plusb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plusb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusb;", 
+            "description": "Named entity: plusb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u229e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pluscir", 
+            "description": "Bad named entity: pluscir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pluscir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pluscir;", 
+            "description": "Named entity: pluscir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a22"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusdo", 
+            "description": "Bad named entity: plusdo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plusdo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusdo;", 
+            "description": "Named entity: plusdo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2214"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusdu", 
+            "description": "Bad named entity: plusdu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plusdu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusdu;", 
+            "description": "Named entity: plusdu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a25"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pluse", 
+            "description": "Bad named entity: pluse without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pluse"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pluse;", 
+            "description": "Named entity: pluse; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a72"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusmn", 
+            "description": "Named entity: plusmn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plusmn;", 
+            "description": "Named entity: plusmn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plussim", 
+            "description": "Bad named entity: plussim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plussim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plussim;", 
+            "description": "Named entity: plussim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a26"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plustwo", 
+            "description": "Bad named entity: plustwo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&plustwo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&plustwo;", 
+            "description": "Named entity: plustwo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a27"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pm", 
+            "description": "Bad named entity: pm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pm;", 
+            "description": "Named entity: pm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pointint", 
+            "description": "Bad named entity: pointint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pointint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pointint;", 
+            "description": "Named entity: pointint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a15"
+                ]
+            ]
+        }, 
+        {
+            "input": "&popf", 
+            "description": "Bad named entity: popf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&popf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&popf;", 
+            "description": "Named entity: popf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd61"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pound", 
+            "description": "Named entity: pound without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pound;", 
+            "description": "Named entity: pound; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pr", 
+            "description": "Bad named entity: pr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pr;", 
+            "description": "Named entity: pr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prE", 
+            "description": "Bad named entity: prE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prE;", 
+            "description": "Named entity: prE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prap", 
+            "description": "Bad named entity: prap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prap;", 
+            "description": "Named entity: prap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prcue", 
+            "description": "Bad named entity: prcue without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prcue"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prcue;", 
+            "description": "Named entity: prcue; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pre", 
+            "description": "Bad named entity: pre without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pre"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pre;", 
+            "description": "Named entity: pre; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prec", 
+            "description": "Bad named entity: prec without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prec;", 
+            "description": "Named entity: prec; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precapprox", 
+            "description": "Bad named entity: precapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&precapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precapprox;", 
+            "description": "Named entity: precapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&preccurlyeq", 
+            "description": "Bad named entity: preccurlyeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&preccurlyeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&preccurlyeq;", 
+            "description": "Named entity: preccurlyeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&preceq", 
+            "description": "Bad named entity: preceq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&preceq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&preceq;", 
+            "description": "Named entity: preceq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precnapprox", 
+            "description": "Bad named entity: precnapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&precnapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precnapprox;", 
+            "description": "Named entity: precnapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precneqq", 
+            "description": "Bad named entity: precneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&precneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precneqq;", 
+            "description": "Named entity: precneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precnsim", 
+            "description": "Bad named entity: precnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&precnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precnsim;", 
+            "description": "Named entity: precnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precsim", 
+            "description": "Bad named entity: precsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&precsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&precsim;", 
+            "description": "Named entity: precsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prime", 
+            "description": "Bad named entity: prime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prime;", 
+            "description": "Named entity: prime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2032"
+                ]
+            ]
+        }, 
+        {
+            "input": "&primes", 
+            "description": "Bad named entity: primes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&primes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&primes;", 
+            "description": "Named entity: primes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2119"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnE", 
+            "description": "Bad named entity: prnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnE;", 
+            "description": "Named entity: prnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnap", 
+            "description": "Bad named entity: prnap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prnap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnap;", 
+            "description": "Named entity: prnap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnsim", 
+            "description": "Bad named entity: prnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prnsim;", 
+            "description": "Named entity: prnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prod", 
+            "description": "Bad named entity: prod without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prod"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prod;", 
+            "description": "Named entity: prod; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u220f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profalar", 
+            "description": "Bad named entity: profalar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&profalar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profalar;", 
+            "description": "Named entity: profalar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u232e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profline", 
+            "description": "Bad named entity: profline without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&profline"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profline;", 
+            "description": "Named entity: profline; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2312"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profsurf", 
+            "description": "Bad named entity: profsurf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&profsurf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&profsurf;", 
+            "description": "Named entity: profsurf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2313"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prop", 
+            "description": "Bad named entity: prop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prop;", 
+            "description": "Named entity: prop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&propto", 
+            "description": "Bad named entity: propto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&propto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&propto;", 
+            "description": "Named entity: propto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prsim", 
+            "description": "Bad named entity: prsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prsim;", 
+            "description": "Named entity: prsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prurel", 
+            "description": "Bad named entity: prurel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&prurel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&prurel;", 
+            "description": "Named entity: prurel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pscr", 
+            "description": "Bad named entity: pscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&pscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&pscr;", 
+            "description": "Named entity: pscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&psi", 
+            "description": "Bad named entity: psi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&psi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&psi;", 
+            "description": "Named entity: psi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&puncsp", 
+            "description": "Bad named entity: puncsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&puncsp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&puncsp;", 
+            "description": "Named entity: puncsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2008"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qfr", 
+            "description": "Bad named entity: qfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&qfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qfr;", 
+            "description": "Named entity: qfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qint", 
+            "description": "Bad named entity: qint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&qint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qint;", 
+            "description": "Named entity: qint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a0c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qopf", 
+            "description": "Bad named entity: qopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&qopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qopf;", 
+            "description": "Named entity: qopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd62"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qprime", 
+            "description": "Bad named entity: qprime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&qprime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qprime;", 
+            "description": "Named entity: qprime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2057"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qscr", 
+            "description": "Bad named entity: qscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&qscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&qscr;", 
+            "description": "Named entity: qscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quaternions", 
+            "description": "Bad named entity: quaternions without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&quaternions"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quaternions;", 
+            "description": "Named entity: quaternions; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u210d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quatint", 
+            "description": "Bad named entity: quatint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&quatint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quatint;", 
+            "description": "Named entity: quatint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a16"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quest", 
+            "description": "Bad named entity: quest without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&quest"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quest;", 
+            "description": "Named entity: quest; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "?"
+                ]
+            ]
+        }, 
+        {
+            "input": "&questeq", 
+            "description": "Bad named entity: questeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&questeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&questeq;", 
+            "description": "Named entity: questeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u225f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&quot", 
+            "description": "Named entity: quot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\""
+                ]
+            ]
+        }, 
+        {
+            "input": "&quot;", 
+            "description": "Named entity: quot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\""
+                ]
+            ]
+        }, 
+        {
+            "input": "&rAarr", 
+            "description": "Bad named entity: rAarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rAarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rAarr;", 
+            "description": "Named entity: rAarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rArr", 
+            "description": "Bad named entity: rArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rArr;", 
+            "description": "Named entity: rArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rAtail", 
+            "description": "Bad named entity: rAtail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rAtail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rAtail;", 
+            "description": "Named entity: rAtail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rBarr", 
+            "description": "Bad named entity: rBarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rBarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rBarr;", 
+            "description": "Named entity: rBarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rHar", 
+            "description": "Bad named entity: rHar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rHar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rHar;", 
+            "description": "Named entity: rHar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2964"
+                ]
+            ]
+        }, 
+        {
+            "input": "&race", 
+            "description": "Bad named entity: race without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&race"
+                ]
+            ]
+        }, 
+        {
+            "input": "&race;", 
+            "description": "Named entity: race; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223d\u0331"
+                ]
+            ]
+        }, 
+        {
+            "input": "&racute", 
+            "description": "Bad named entity: racute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&racute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&racute;", 
+            "description": "Named entity: racute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0155"
+                ]
+            ]
+        }, 
+        {
+            "input": "&radic", 
+            "description": "Bad named entity: radic without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&radic"
+                ]
+            ]
+        }, 
+        {
+            "input": "&radic;", 
+            "description": "Named entity: radic; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&raemptyv", 
+            "description": "Bad named entity: raemptyv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&raemptyv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&raemptyv;", 
+            "description": "Named entity: raemptyv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rang", 
+            "description": "Bad named entity: rang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rang;", 
+            "description": "Named entity: rang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rangd", 
+            "description": "Bad named entity: rangd without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rangd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rangd;", 
+            "description": "Named entity: rangd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2992"
+                ]
+            ]
+        }, 
+        {
+            "input": "&range", 
+            "description": "Bad named entity: range without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&range"
+                ]
+            ]
+        }, 
+        {
+            "input": "&range;", 
+            "description": "Named entity: range; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rangle", 
+            "description": "Bad named entity: rangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rangle;", 
+            "description": "Named entity: rangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&raquo", 
+            "description": "Named entity: raquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&raquo;", 
+            "description": "Named entity: raquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarr", 
+            "description": "Bad named entity: rarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarr;", 
+            "description": "Named entity: rarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrap", 
+            "description": "Bad named entity: rarrap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrap;", 
+            "description": "Named entity: rarrap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2975"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrb", 
+            "description": "Bad named entity: rarrb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrb;", 
+            "description": "Named entity: rarrb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21e5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrbfs", 
+            "description": "Bad named entity: rarrbfs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrbfs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrbfs;", 
+            "description": "Named entity: rarrbfs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2920"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrc", 
+            "description": "Bad named entity: rarrc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrc;", 
+            "description": "Named entity: rarrc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2933"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrfs", 
+            "description": "Bad named entity: rarrfs without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrfs"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrfs;", 
+            "description": "Named entity: rarrfs; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrhk", 
+            "description": "Bad named entity: rarrhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrhk;", 
+            "description": "Named entity: rarrhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrlp", 
+            "description": "Bad named entity: rarrlp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrlp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrlp;", 
+            "description": "Named entity: rarrlp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21ac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrpl", 
+            "description": "Bad named entity: rarrpl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrpl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrpl;", 
+            "description": "Named entity: rarrpl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2945"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrsim", 
+            "description": "Bad named entity: rarrsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrsim;", 
+            "description": "Named entity: rarrsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2974"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrtl", 
+            "description": "Bad named entity: rarrtl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrtl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrtl;", 
+            "description": "Named entity: rarrtl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrw", 
+            "description": "Bad named entity: rarrw without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rarrw"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rarrw;", 
+            "description": "Named entity: rarrw; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ratail", 
+            "description": "Bad named entity: ratail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ratail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ratail;", 
+            "description": "Named entity: ratail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u291a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ratio", 
+            "description": "Bad named entity: ratio without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ratio"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ratio;", 
+            "description": "Named entity: ratio; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2236"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rationals", 
+            "description": "Bad named entity: rationals without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rationals"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rationals;", 
+            "description": "Named entity: rationals; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbarr", 
+            "description": "Bad named entity: rbarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbarr;", 
+            "description": "Named entity: rbarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u290d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbbrk", 
+            "description": "Bad named entity: rbbrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbbrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbbrk;", 
+            "description": "Named entity: rbbrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2773"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrace", 
+            "description": "Bad named entity: rbrace without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbrace"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrace;", 
+            "description": "Named entity: rbrace; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "}"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrack", 
+            "description": "Bad named entity: rbrack without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbrack"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrack;", 
+            "description": "Named entity: rbrack; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "]"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrke", 
+            "description": "Bad named entity: rbrke without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbrke"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrke;", 
+            "description": "Named entity: rbrke; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u298c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrksld", 
+            "description": "Bad named entity: rbrksld without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbrksld"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrksld;", 
+            "description": "Named entity: rbrksld; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u298e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrkslu", 
+            "description": "Bad named entity: rbrkslu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rbrkslu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rbrkslu;", 
+            "description": "Named entity: rbrkslu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2990"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcaron", 
+            "description": "Bad named entity: rcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcaron;", 
+            "description": "Named entity: rcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0159"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcedil", 
+            "description": "Bad named entity: rcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcedil;", 
+            "description": "Named entity: rcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0157"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rceil", 
+            "description": "Bad named entity: rceil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rceil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rceil;", 
+            "description": "Named entity: rceil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2309"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcub", 
+            "description": "Bad named entity: rcub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rcub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcub;", 
+            "description": "Named entity: rcub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "}"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcy", 
+            "description": "Bad named entity: rcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rcy;", 
+            "description": "Named entity: rcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0440"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdca", 
+            "description": "Bad named entity: rdca without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rdca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdca;", 
+            "description": "Named entity: rdca; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2937"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdldhar", 
+            "description": "Bad named entity: rdldhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rdldhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdldhar;", 
+            "description": "Named entity: rdldhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2969"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdquo", 
+            "description": "Bad named entity: rdquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rdquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdquo;", 
+            "description": "Named entity: rdquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdquor", 
+            "description": "Bad named entity: rdquor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rdquor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdquor;", 
+            "description": "Named entity: rdquor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdsh", 
+            "description": "Bad named entity: rdsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rdsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rdsh;", 
+            "description": "Named entity: rdsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&real", 
+            "description": "Bad named entity: real without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&real"
+                ]
+            ]
+        }, 
+        {
+            "input": "&real;", 
+            "description": "Named entity: real; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&realine", 
+            "description": "Bad named entity: realine without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&realine"
+                ]
+            ]
+        }, 
+        {
+            "input": "&realine;", 
+            "description": "Named entity: realine; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&realpart", 
+            "description": "Bad named entity: realpart without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&realpart"
+                ]
+            ]
+        }, 
+        {
+            "input": "&realpart;", 
+            "description": "Named entity: realpart; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&reals", 
+            "description": "Bad named entity: reals without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&reals"
+                ]
+            ]
+        }, 
+        {
+            "input": "&reals;", 
+            "description": "Named entity: reals; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rect", 
+            "description": "Bad named entity: rect without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rect"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rect;", 
+            "description": "Named entity: rect; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&reg", 
+            "description": "Named entity: reg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&reg;", 
+            "description": "Named entity: reg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ae"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfisht", 
+            "description": "Bad named entity: rfisht without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rfisht"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfisht;", 
+            "description": "Named entity: rfisht; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u297d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfloor", 
+            "description": "Bad named entity: rfloor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rfloor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfloor;", 
+            "description": "Named entity: rfloor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfr", 
+            "description": "Bad named entity: rfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rfr;", 
+            "description": "Named entity: rfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd2f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rhard", 
+            "description": "Bad named entity: rhard without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rhard"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rhard;", 
+            "description": "Named entity: rhard; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rharu", 
+            "description": "Bad named entity: rharu without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rharu"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rharu;", 
+            "description": "Named entity: rharu; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rharul", 
+            "description": "Bad named entity: rharul without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rharul"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rharul;", 
+            "description": "Named entity: rharul; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rho", 
+            "description": "Bad named entity: rho without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rho"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rho;", 
+            "description": "Named entity: rho; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rhov", 
+            "description": "Bad named entity: rhov without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rhov"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rhov;", 
+            "description": "Named entity: rhov; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightarrow", 
+            "description": "Bad named entity: rightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightarrow;", 
+            "description": "Named entity: rightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightarrowtail", 
+            "description": "Bad named entity: rightarrowtail without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightarrowtail"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightarrowtail;", 
+            "description": "Named entity: rightarrowtail; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightharpoondown", 
+            "description": "Bad named entity: rightharpoondown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightharpoondown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightharpoondown;", 
+            "description": "Named entity: rightharpoondown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightharpoonup", 
+            "description": "Bad named entity: rightharpoonup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightharpoonup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightharpoonup;", 
+            "description": "Named entity: rightharpoonup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightleftarrows", 
+            "description": "Bad named entity: rightleftarrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightleftarrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightleftarrows;", 
+            "description": "Named entity: rightleftarrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightleftharpoons", 
+            "description": "Bad named entity: rightleftharpoons without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightleftharpoons"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightleftharpoons;", 
+            "description": "Named entity: rightleftharpoons; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightrightarrows", 
+            "description": "Bad named entity: rightrightarrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightrightarrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightrightarrows;", 
+            "description": "Named entity: rightrightarrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightsquigarrow", 
+            "description": "Bad named entity: rightsquigarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightsquigarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightsquigarrow;", 
+            "description": "Named entity: rightsquigarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightthreetimes", 
+            "description": "Bad named entity: rightthreetimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rightthreetimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rightthreetimes;", 
+            "description": "Named entity: rightthreetimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ring", 
+            "description": "Bad named entity: ring without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ring"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ring;", 
+            "description": "Named entity: ring; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02da"
+                ]
+            ]
+        }, 
+        {
+            "input": "&risingdotseq", 
+            "description": "Bad named entity: risingdotseq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&risingdotseq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&risingdotseq;", 
+            "description": "Named entity: risingdotseq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2253"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlarr", 
+            "description": "Bad named entity: rlarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rlarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlarr;", 
+            "description": "Named entity: rlarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlhar", 
+            "description": "Bad named entity: rlhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rlhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlhar;", 
+            "description": "Named entity: rlhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlm", 
+            "description": "Bad named entity: rlm without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rlm"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rlm;", 
+            "description": "Named entity: rlm; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rmoust", 
+            "description": "Bad named entity: rmoust without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rmoust"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rmoust;", 
+            "description": "Named entity: rmoust; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rmoustache", 
+            "description": "Bad named entity: rmoustache without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rmoustache"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rmoustache;", 
+            "description": "Named entity: rmoustache; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rnmid", 
+            "description": "Bad named entity: rnmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rnmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rnmid;", 
+            "description": "Named entity: rnmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roang", 
+            "description": "Bad named entity: roang without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&roang"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roang;", 
+            "description": "Named entity: roang; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27ed"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roarr", 
+            "description": "Bad named entity: roarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&roarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roarr;", 
+            "description": "Named entity: roarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21fe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&robrk", 
+            "description": "Bad named entity: robrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&robrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&robrk;", 
+            "description": "Named entity: robrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27e7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ropar", 
+            "description": "Bad named entity: ropar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ropar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ropar;", 
+            "description": "Named entity: ropar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2986"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ropf", 
+            "description": "Bad named entity: ropf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ropf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ropf;", 
+            "description": "Named entity: ropf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd63"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roplus", 
+            "description": "Bad named entity: roplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&roplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&roplus;", 
+            "description": "Named entity: roplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a2e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rotimes", 
+            "description": "Bad named entity: rotimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rotimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rotimes;", 
+            "description": "Named entity: rotimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a35"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rpar", 
+            "description": "Bad named entity: rpar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rpar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rpar;", 
+            "description": "Named entity: rpar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ")"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rpargt", 
+            "description": "Bad named entity: rpargt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rpargt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rpargt;", 
+            "description": "Named entity: rpargt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2994"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rppolint", 
+            "description": "Bad named entity: rppolint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rppolint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rppolint;", 
+            "description": "Named entity: rppolint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a12"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rrarr", 
+            "description": "Bad named entity: rrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rrarr;", 
+            "description": "Named entity: rrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsaquo", 
+            "description": "Bad named entity: rsaquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rsaquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsaquo;", 
+            "description": "Named entity: rsaquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u203a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rscr", 
+            "description": "Bad named entity: rscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rscr;", 
+            "description": "Named entity: rscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsh", 
+            "description": "Bad named entity: rsh without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rsh"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsh;", 
+            "description": "Named entity: rsh; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21b1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsqb", 
+            "description": "Bad named entity: rsqb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rsqb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsqb;", 
+            "description": "Named entity: rsqb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "]"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsquo", 
+            "description": "Bad named entity: rsquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rsquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsquo;", 
+            "description": "Named entity: rsquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2019"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsquor", 
+            "description": "Bad named entity: rsquor without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rsquor"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rsquor;", 
+            "description": "Named entity: rsquor; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2019"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rthree", 
+            "description": "Bad named entity: rthree without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rthree"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rthree;", 
+            "description": "Named entity: rthree; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22cc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtimes", 
+            "description": "Bad named entity: rtimes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rtimes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtimes;", 
+            "description": "Named entity: rtimes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtri", 
+            "description": "Bad named entity: rtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtri;", 
+            "description": "Named entity: rtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtrie", 
+            "description": "Bad named entity: rtrie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rtrie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtrie;", 
+            "description": "Named entity: rtrie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtrif", 
+            "description": "Bad named entity: rtrif without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rtrif"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtrif;", 
+            "description": "Named entity: rtrif; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtriltri", 
+            "description": "Bad named entity: rtriltri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rtriltri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rtriltri;", 
+            "description": "Named entity: rtriltri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29ce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ruluhar", 
+            "description": "Bad named entity: ruluhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ruluhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ruluhar;", 
+            "description": "Named entity: ruluhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2968"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rx", 
+            "description": "Bad named entity: rx without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&rx"
+                ]
+            ]
+        }, 
+        {
+            "input": "&rx;", 
+            "description": "Named entity: rx; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u211e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sacute", 
+            "description": "Bad named entity: sacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sacute;", 
+            "description": "Named entity: sacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sbquo", 
+            "description": "Bad named entity: sbquo without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sbquo"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sbquo;", 
+            "description": "Named entity: sbquo; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u201a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sc", 
+            "description": "Bad named entity: sc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sc;", 
+            "description": "Named entity: sc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scE", 
+            "description": "Bad named entity: scE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scE;", 
+            "description": "Named entity: scE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scap", 
+            "description": "Bad named entity: scap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scap;", 
+            "description": "Named entity: scap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scaron", 
+            "description": "Bad named entity: scaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scaron;", 
+            "description": "Named entity: scaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0161"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sccue", 
+            "description": "Bad named entity: sccue without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sccue"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sccue;", 
+            "description": "Named entity: sccue; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sce", 
+            "description": "Bad named entity: sce without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sce;", 
+            "description": "Named entity: sce; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scedil", 
+            "description": "Bad named entity: scedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scedil;", 
+            "description": "Named entity: scedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scirc", 
+            "description": "Bad named entity: scirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scirc;", 
+            "description": "Named entity: scirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u015d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnE", 
+            "description": "Bad named entity: scnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnE;", 
+            "description": "Named entity: scnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnap", 
+            "description": "Bad named entity: scnap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scnap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnap;", 
+            "description": "Named entity: scnap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnsim", 
+            "description": "Bad named entity: scnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scnsim;", 
+            "description": "Named entity: scnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scpolint", 
+            "description": "Bad named entity: scpolint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scpolint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scpolint;", 
+            "description": "Named entity: scpolint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a13"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scsim", 
+            "description": "Bad named entity: scsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scsim;", 
+            "description": "Named entity: scsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scy", 
+            "description": "Bad named entity: scy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&scy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&scy;", 
+            "description": "Named entity: scy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0441"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdot", 
+            "description": "Bad named entity: sdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdot;", 
+            "description": "Named entity: sdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdotb", 
+            "description": "Bad named entity: sdotb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sdotb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdotb;", 
+            "description": "Named entity: sdotb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdote", 
+            "description": "Bad named entity: sdote without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sdote"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sdote;", 
+            "description": "Named entity: sdote; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a66"
+                ]
+            ]
+        }, 
+        {
+            "input": "&seArr", 
+            "description": "Bad named entity: seArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&seArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&seArr;", 
+            "description": "Named entity: seArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searhk", 
+            "description": "Bad named entity: searhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&searhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searhk;", 
+            "description": "Named entity: searhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2925"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searr", 
+            "description": "Bad named entity: searr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&searr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searr;", 
+            "description": "Named entity: searr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2198"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searrow", 
+            "description": "Bad named entity: searrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&searrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&searrow;", 
+            "description": "Named entity: searrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2198"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sect", 
+            "description": "Named entity: sect without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sect;", 
+            "description": "Named entity: sect; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&semi", 
+            "description": "Bad named entity: semi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&semi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&semi;", 
+            "description": "Named entity: semi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    ";"
+                ]
+            ]
+        }, 
+        {
+            "input": "&seswar", 
+            "description": "Bad named entity: seswar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&seswar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&seswar;", 
+            "description": "Named entity: seswar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2929"
+                ]
+            ]
+        }, 
+        {
+            "input": "&setminus", 
+            "description": "Bad named entity: setminus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&setminus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&setminus;", 
+            "description": "Named entity: setminus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2216"
+                ]
+            ]
+        }, 
+        {
+            "input": "&setmn", 
+            "description": "Bad named entity: setmn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&setmn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&setmn;", 
+            "description": "Named entity: setmn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2216"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sext", 
+            "description": "Bad named entity: sext without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sext"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sext;", 
+            "description": "Named entity: sext; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2736"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sfr", 
+            "description": "Bad named entity: sfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sfr;", 
+            "description": "Named entity: sfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd30"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sfrown", 
+            "description": "Bad named entity: sfrown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sfrown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sfrown;", 
+            "description": "Named entity: sfrown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2322"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sharp", 
+            "description": "Bad named entity: sharp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sharp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sharp;", 
+            "description": "Named entity: sharp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u266f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shchcy", 
+            "description": "Bad named entity: shchcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&shchcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shchcy;", 
+            "description": "Named entity: shchcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0449"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shcy", 
+            "description": "Bad named entity: shcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&shcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shcy;", 
+            "description": "Named entity: shcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0448"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shortmid", 
+            "description": "Bad named entity: shortmid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&shortmid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shortmid;", 
+            "description": "Named entity: shortmid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2223"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shortparallel", 
+            "description": "Bad named entity: shortparallel without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&shortparallel"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shortparallel;", 
+            "description": "Named entity: shortparallel; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2225"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shy", 
+            "description": "Named entity: shy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&shy;", 
+            "description": "Named entity: shy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ad"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigma", 
+            "description": "Bad named entity: sigma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sigma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigma;", 
+            "description": "Named entity: sigma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigmaf", 
+            "description": "Bad named entity: sigmaf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sigmaf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigmaf;", 
+            "description": "Named entity: sigmaf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigmav", 
+            "description": "Bad named entity: sigmav without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sigmav"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sigmav;", 
+            "description": "Named entity: sigmav; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sim", 
+            "description": "Bad named entity: sim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sim;", 
+            "description": "Named entity: sim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simdot", 
+            "description": "Bad named entity: simdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simdot;", 
+            "description": "Named entity: simdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a6a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sime", 
+            "description": "Bad named entity: sime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sime;", 
+            "description": "Named entity: sime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2243"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simeq", 
+            "description": "Bad named entity: simeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simeq;", 
+            "description": "Named entity: simeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2243"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simg", 
+            "description": "Bad named entity: simg without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simg"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simg;", 
+            "description": "Named entity: simg; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a9e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simgE", 
+            "description": "Bad named entity: simgE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simgE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simgE;", 
+            "description": "Named entity: simgE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aa0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&siml", 
+            "description": "Bad named entity: siml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&siml"
+                ]
+            ]
+        }, 
+        {
+            "input": "&siml;", 
+            "description": "Named entity: siml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a9d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simlE", 
+            "description": "Bad named entity: simlE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simlE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simlE;", 
+            "description": "Named entity: simlE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a9f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simne", 
+            "description": "Bad named entity: simne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simne;", 
+            "description": "Named entity: simne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2246"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simplus", 
+            "description": "Bad named entity: simplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simplus;", 
+            "description": "Named entity: simplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a24"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simrarr", 
+            "description": "Bad named entity: simrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&simrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&simrarr;", 
+            "description": "Named entity: simrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2972"
+                ]
+            ]
+        }, 
+        {
+            "input": "&slarr", 
+            "description": "Bad named entity: slarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&slarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&slarr;", 
+            "description": "Named entity: slarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2190"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smallsetminus", 
+            "description": "Bad named entity: smallsetminus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smallsetminus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smallsetminus;", 
+            "description": "Named entity: smallsetminus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2216"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smashp", 
+            "description": "Bad named entity: smashp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smashp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smashp;", 
+            "description": "Named entity: smashp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a33"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smeparsl", 
+            "description": "Bad named entity: smeparsl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smeparsl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smeparsl;", 
+            "description": "Named entity: smeparsl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29e4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smid", 
+            "description": "Bad named entity: smid without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smid"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smid;", 
+            "description": "Named entity: smid; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2223"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smile", 
+            "description": "Bad named entity: smile without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smile"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smile;", 
+            "description": "Named entity: smile; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2323"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smt", 
+            "description": "Bad named entity: smt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smt;", 
+            "description": "Named entity: smt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aaa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smte", 
+            "description": "Bad named entity: smte without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smte"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smte;", 
+            "description": "Named entity: smte; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smtes", 
+            "description": "Bad named entity: smtes without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&smtes"
+                ]
+            ]
+        }, 
+        {
+            "input": "&smtes;", 
+            "description": "Named entity: smtes; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aac\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&softcy", 
+            "description": "Bad named entity: softcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&softcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&softcy;", 
+            "description": "Named entity: softcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sol", 
+            "description": "Bad named entity: sol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sol;", 
+            "description": "Named entity: sol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "/"
+                ]
+            ]
+        }, 
+        {
+            "input": "&solb", 
+            "description": "Bad named entity: solb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&solb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&solb;", 
+            "description": "Named entity: solb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&solbar", 
+            "description": "Bad named entity: solbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&solbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&solbar;", 
+            "description": "Named entity: solbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u233f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sopf", 
+            "description": "Bad named entity: sopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sopf;", 
+            "description": "Named entity: sopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd64"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spades", 
+            "description": "Bad named entity: spades without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&spades"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spades;", 
+            "description": "Named entity: spades; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2660"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spadesuit", 
+            "description": "Bad named entity: spadesuit without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&spadesuit"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spadesuit;", 
+            "description": "Named entity: spadesuit; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2660"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spar", 
+            "description": "Bad named entity: spar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&spar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&spar;", 
+            "description": "Named entity: spar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2225"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcap", 
+            "description": "Bad named entity: sqcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcap;", 
+            "description": "Named entity: sqcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2293"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcaps", 
+            "description": "Bad named entity: sqcaps without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqcaps"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcaps;", 
+            "description": "Named entity: sqcaps; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2293\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcup", 
+            "description": "Bad named entity: sqcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcup;", 
+            "description": "Named entity: sqcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2294"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcups", 
+            "description": "Bad named entity: sqcups without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqcups"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqcups;", 
+            "description": "Named entity: sqcups; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2294\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsub", 
+            "description": "Bad named entity: sqsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsub;", 
+            "description": "Named entity: sqsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsube", 
+            "description": "Bad named entity: sqsube without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsube"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsube;", 
+            "description": "Named entity: sqsube; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2291"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsubset", 
+            "description": "Bad named entity: sqsubset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsubset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsubset;", 
+            "description": "Named entity: sqsubset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsubseteq", 
+            "description": "Bad named entity: sqsubseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsubseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsubseteq;", 
+            "description": "Named entity: sqsubseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2291"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsup", 
+            "description": "Bad named entity: sqsup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsup;", 
+            "description": "Named entity: sqsup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2290"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupe", 
+            "description": "Bad named entity: sqsupe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsupe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupe;", 
+            "description": "Named entity: sqsupe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2292"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupset", 
+            "description": "Bad named entity: sqsupset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsupset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupset;", 
+            "description": "Named entity: sqsupset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2290"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupseteq", 
+            "description": "Bad named entity: sqsupseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sqsupseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sqsupseteq;", 
+            "description": "Named entity: sqsupseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2292"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squ", 
+            "description": "Bad named entity: squ without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&squ"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squ;", 
+            "description": "Named entity: squ; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&square", 
+            "description": "Bad named entity: square without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&square"
+                ]
+            ]
+        }, 
+        {
+            "input": "&square;", 
+            "description": "Named entity: square; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25a1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squarf", 
+            "description": "Bad named entity: squarf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&squarf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squarf;", 
+            "description": "Named entity: squarf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squf", 
+            "description": "Bad named entity: squf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&squf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&squf;", 
+            "description": "Named entity: squf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25aa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&srarr", 
+            "description": "Bad named entity: srarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&srarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&srarr;", 
+            "description": "Named entity: srarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2192"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sscr", 
+            "description": "Bad named entity: sscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sscr;", 
+            "description": "Named entity: sscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ssetmn", 
+            "description": "Bad named entity: ssetmn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ssetmn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ssetmn;", 
+            "description": "Named entity: ssetmn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2216"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ssmile", 
+            "description": "Bad named entity: ssmile without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ssmile"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ssmile;", 
+            "description": "Named entity: ssmile; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2323"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sstarf", 
+            "description": "Bad named entity: sstarf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sstarf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sstarf;", 
+            "description": "Named entity: sstarf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&star", 
+            "description": "Bad named entity: star without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&star"
+                ]
+            ]
+        }, 
+        {
+            "input": "&star;", 
+            "description": "Named entity: star; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2606"
+                ]
+            ]
+        }, 
+        {
+            "input": "&starf", 
+            "description": "Bad named entity: starf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&starf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&starf;", 
+            "description": "Named entity: starf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2605"
+                ]
+            ]
+        }, 
+        {
+            "input": "&straightepsilon", 
+            "description": "Bad named entity: straightepsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&straightepsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&straightepsilon;", 
+            "description": "Named entity: straightepsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&straightphi", 
+            "description": "Bad named entity: straightphi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&straightphi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&straightphi;", 
+            "description": "Named entity: straightphi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&strns", 
+            "description": "Bad named entity: strns without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&strns"
+                ]
+            ]
+        }, 
+        {
+            "input": "&strns;", 
+            "description": "Named entity: strns; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00af"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sub", 
+            "description": "Bad named entity: sub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sub;", 
+            "description": "Named entity: sub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2282"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subE", 
+            "description": "Bad named entity: subE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subE;", 
+            "description": "Named entity: subE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subdot", 
+            "description": "Bad named entity: subdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subdot;", 
+            "description": "Named entity: subdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2abd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sube", 
+            "description": "Bad named entity: sube without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sube"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sube;", 
+            "description": "Named entity: sube; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2286"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subedot", 
+            "description": "Bad named entity: subedot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subedot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subedot;", 
+            "description": "Named entity: subedot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&submult", 
+            "description": "Bad named entity: submult without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&submult"
+                ]
+            ]
+        }, 
+        {
+            "input": "&submult;", 
+            "description": "Named entity: submult; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subnE", 
+            "description": "Bad named entity: subnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subnE;", 
+            "description": "Named entity: subnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subne", 
+            "description": "Bad named entity: subne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subne;", 
+            "description": "Named entity: subne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subplus", 
+            "description": "Bad named entity: subplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subplus;", 
+            "description": "Named entity: subplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2abf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subrarr", 
+            "description": "Bad named entity: subrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subrarr;", 
+            "description": "Named entity: subrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2979"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subset", 
+            "description": "Bad named entity: subset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subset;", 
+            "description": "Named entity: subset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2282"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subseteq", 
+            "description": "Bad named entity: subseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subseteq;", 
+            "description": "Named entity: subseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2286"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subseteqq", 
+            "description": "Bad named entity: subseteqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subseteqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subseteqq;", 
+            "description": "Named entity: subseteqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsetneq", 
+            "description": "Bad named entity: subsetneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subsetneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsetneq;", 
+            "description": "Named entity: subsetneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsetneqq", 
+            "description": "Bad named entity: subsetneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subsetneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsetneqq;", 
+            "description": "Named entity: subsetneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsim", 
+            "description": "Bad named entity: subsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsim;", 
+            "description": "Named entity: subsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsub", 
+            "description": "Bad named entity: subsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsub;", 
+            "description": "Named entity: subsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsup", 
+            "description": "Bad named entity: subsup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&subsup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&subsup;", 
+            "description": "Named entity: subsup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succ", 
+            "description": "Bad named entity: succ without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succ"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succ;", 
+            "description": "Named entity: succ; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succapprox", 
+            "description": "Bad named entity: succapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succapprox;", 
+            "description": "Named entity: succapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succcurlyeq", 
+            "description": "Bad named entity: succcurlyeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succcurlyeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succcurlyeq;", 
+            "description": "Named entity: succcurlyeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succeq", 
+            "description": "Bad named entity: succeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succeq;", 
+            "description": "Named entity: succeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succnapprox", 
+            "description": "Bad named entity: succnapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succnapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succnapprox;", 
+            "description": "Named entity: succnapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2aba"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succneqq", 
+            "description": "Bad named entity: succneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succneqq;", 
+            "description": "Named entity: succneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ab6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succnsim", 
+            "description": "Bad named entity: succnsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succnsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succnsim;", 
+            "description": "Named entity: succnsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22e9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succsim", 
+            "description": "Bad named entity: succsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&succsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&succsim;", 
+            "description": "Named entity: succsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u227f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sum", 
+            "description": "Bad named entity: sum without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sum"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sum;", 
+            "description": "Named entity: sum; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2211"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sung", 
+            "description": "Bad named entity: sung without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sung"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sung;", 
+            "description": "Named entity: sung; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u266a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup", 
+            "description": "Bad named entity: sup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&sup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup1", 
+            "description": "Named entity: sup1 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup1;", 
+            "description": "Named entity: sup1; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup2", 
+            "description": "Named entity: sup2 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup2;", 
+            "description": "Named entity: sup2; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup3", 
+            "description": "Named entity: sup3 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup3;", 
+            "description": "Named entity: sup3; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&sup;", 
+            "description": "Named entity: sup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supE", 
+            "description": "Bad named entity: supE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supE;", 
+            "description": "Named entity: supE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supdot", 
+            "description": "Bad named entity: supdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supdot;", 
+            "description": "Named entity: supdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2abe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supdsub", 
+            "description": "Bad named entity: supdsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supdsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supdsub;", 
+            "description": "Named entity: supdsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supe", 
+            "description": "Bad named entity: supe without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supe;", 
+            "description": "Named entity: supe; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2287"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supedot", 
+            "description": "Bad named entity: supedot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supedot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supedot;", 
+            "description": "Named entity: supedot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suphsol", 
+            "description": "Bad named entity: suphsol without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&suphsol"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suphsol;", 
+            "description": "Named entity: suphsol; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27c9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suphsub", 
+            "description": "Bad named entity: suphsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&suphsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suphsub;", 
+            "description": "Named entity: suphsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suplarr", 
+            "description": "Bad named entity: suplarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&suplarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&suplarr;", 
+            "description": "Named entity: suplarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u297b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supmult", 
+            "description": "Bad named entity: supmult without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supmult"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supmult;", 
+            "description": "Named entity: supmult; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supnE", 
+            "description": "Bad named entity: supnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supnE;", 
+            "description": "Named entity: supnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supne", 
+            "description": "Bad named entity: supne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supne;", 
+            "description": "Named entity: supne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supplus", 
+            "description": "Bad named entity: supplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supplus;", 
+            "description": "Named entity: supplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supset", 
+            "description": "Bad named entity: supset without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supset"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supset;", 
+            "description": "Named entity: supset; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supseteq", 
+            "description": "Bad named entity: supseteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supseteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supseteq;", 
+            "description": "Named entity: supseteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2287"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supseteqq", 
+            "description": "Bad named entity: supseteqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supseteqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supseteqq;", 
+            "description": "Named entity: supseteqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsetneq", 
+            "description": "Bad named entity: supsetneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supsetneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsetneq;", 
+            "description": "Named entity: supsetneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsetneqq", 
+            "description": "Bad named entity: supsetneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supsetneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsetneqq;", 
+            "description": "Named entity: supsetneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsim", 
+            "description": "Bad named entity: supsim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supsim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsim;", 
+            "description": "Named entity: supsim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ac8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsub", 
+            "description": "Bad named entity: supsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsub;", 
+            "description": "Named entity: supsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsup", 
+            "description": "Bad named entity: supsup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&supsup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&supsup;", 
+            "description": "Named entity: supsup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ad6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swArr", 
+            "description": "Bad named entity: swArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&swArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swArr;", 
+            "description": "Named entity: swArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarhk", 
+            "description": "Bad named entity: swarhk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&swarhk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarhk;", 
+            "description": "Named entity: swarhk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2926"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarr", 
+            "description": "Bad named entity: swarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&swarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarr;", 
+            "description": "Named entity: swarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2199"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarrow", 
+            "description": "Bad named entity: swarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&swarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swarrow;", 
+            "description": "Named entity: swarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2199"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swnwar", 
+            "description": "Bad named entity: swnwar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&swnwar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&swnwar;", 
+            "description": "Named entity: swnwar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u292a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&szlig", 
+            "description": "Named entity: szlig without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00df"
+                ]
+            ]
+        }, 
+        {
+            "input": "&szlig;", 
+            "description": "Named entity: szlig; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00df"
+                ]
+            ]
+        }, 
+        {
+            "input": "&target", 
+            "description": "Bad named entity: target without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&target"
+                ]
+            ]
+        }, 
+        {
+            "input": "&target;", 
+            "description": "Named entity: target; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2316"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tau", 
+            "description": "Bad named entity: tau without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tau"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tau;", 
+            "description": "Named entity: tau; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tbrk", 
+            "description": "Bad named entity: tbrk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tbrk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tbrk;", 
+            "description": "Named entity: tbrk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcaron", 
+            "description": "Bad named entity: tcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcaron;", 
+            "description": "Named entity: tcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0165"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcedil", 
+            "description": "Bad named entity: tcedil without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tcedil"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcedil;", 
+            "description": "Named entity: tcedil; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0163"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcy", 
+            "description": "Bad named entity: tcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tcy;", 
+            "description": "Named entity: tcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0442"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tdot", 
+            "description": "Bad named entity: tdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tdot;", 
+            "description": "Named entity: tdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u20db"
+                ]
+            ]
+        }, 
+        {
+            "input": "&telrec", 
+            "description": "Bad named entity: telrec without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&telrec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&telrec;", 
+            "description": "Named entity: telrec; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2315"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tfr", 
+            "description": "Bad named entity: tfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tfr;", 
+            "description": "Named entity: tfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd31"
+                ]
+            ]
+        }, 
+        {
+            "input": "&there4", 
+            "description": "Bad named entity: there4 without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&there4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&there4;", 
+            "description": "Named entity: there4; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2234"
+                ]
+            ]
+        }, 
+        {
+            "input": "&therefore", 
+            "description": "Bad named entity: therefore without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&therefore"
+                ]
+            ]
+        }, 
+        {
+            "input": "&therefore;", 
+            "description": "Named entity: therefore; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2234"
+                ]
+            ]
+        }, 
+        {
+            "input": "&theta", 
+            "description": "Bad named entity: theta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&theta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&theta;", 
+            "description": "Named entity: theta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thetasym", 
+            "description": "Bad named entity: thetasym without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thetasym"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thetasym;", 
+            "description": "Named entity: thetasym; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thetav", 
+            "description": "Bad named entity: thetav without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thetav"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thetav;", 
+            "description": "Named entity: thetav; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thickapprox", 
+            "description": "Bad named entity: thickapprox without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thickapprox"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thickapprox;", 
+            "description": "Named entity: thickapprox; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thicksim", 
+            "description": "Bad named entity: thicksim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thicksim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thicksim;", 
+            "description": "Named entity: thicksim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thinsp", 
+            "description": "Bad named entity: thinsp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thinsp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thinsp;", 
+            "description": "Named entity: thinsp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2009"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thkap", 
+            "description": "Bad named entity: thkap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thkap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thkap;", 
+            "description": "Named entity: thkap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2248"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thksim", 
+            "description": "Bad named entity: thksim without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&thksim"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thksim;", 
+            "description": "Named entity: thksim; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u223c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thorn", 
+            "description": "Named entity: thorn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00fe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&thorn;", 
+            "description": "Named entity: thorn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00fe"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tilde", 
+            "description": "Bad named entity: tilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tilde;", 
+            "description": "Named entity: tilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u02dc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&times", 
+            "description": "Named entity: times without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00d7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&times;", 
+            "description": "Named entity: times; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00d7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&timesb;", 
+            "description": "Named entity: timesb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&timesbar;", 
+            "description": "Named entity: timesbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a31"
+                ]
+            ]
+        }, 
+        {
+            "input": "&timesd;", 
+            "description": "Named entity: timesd; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a30"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tint", 
+            "description": "Bad named entity: tint without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tint"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tint;", 
+            "description": "Named entity: tint; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u222d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&toea", 
+            "description": "Bad named entity: toea without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&toea"
+                ]
+            ]
+        }, 
+        {
+            "input": "&toea;", 
+            "description": "Named entity: toea; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2928"
+                ]
+            ]
+        }, 
+        {
+            "input": "&top", 
+            "description": "Bad named entity: top without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&top"
+                ]
+            ]
+        }, 
+        {
+            "input": "&top;", 
+            "description": "Named entity: top; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topbot", 
+            "description": "Bad named entity: topbot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&topbot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topbot;", 
+            "description": "Named entity: topbot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2336"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topcir", 
+            "description": "Bad named entity: topcir without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&topcir"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topcir;", 
+            "description": "Named entity: topcir; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2af1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topf", 
+            "description": "Bad named entity: topf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&topf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topf;", 
+            "description": "Named entity: topf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd65"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topfork", 
+            "description": "Bad named entity: topfork without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&topfork"
+                ]
+            ]
+        }, 
+        {
+            "input": "&topfork;", 
+            "description": "Named entity: topfork; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ada"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tosa", 
+            "description": "Bad named entity: tosa without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tosa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tosa;", 
+            "description": "Named entity: tosa; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2929"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tprime", 
+            "description": "Bad named entity: tprime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tprime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tprime;", 
+            "description": "Named entity: tprime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2034"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trade", 
+            "description": "Bad named entity: trade without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trade"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trade;", 
+            "description": "Named entity: trade; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2122"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangle", 
+            "description": "Bad named entity: triangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangle;", 
+            "description": "Named entity: triangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangledown", 
+            "description": "Bad named entity: triangledown without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triangledown"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangledown;", 
+            "description": "Named entity: triangledown; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleleft", 
+            "description": "Bad named entity: triangleleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triangleleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleleft;", 
+            "description": "Named entity: triangleleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trianglelefteq", 
+            "description": "Bad named entity: trianglelefteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trianglelefteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trianglelefteq;", 
+            "description": "Named entity: trianglelefteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleq", 
+            "description": "Bad named entity: triangleq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triangleq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleq;", 
+            "description": "Named entity: triangleq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u225c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleright", 
+            "description": "Bad named entity: triangleright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triangleright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triangleright;", 
+            "description": "Named entity: triangleright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trianglerighteq", 
+            "description": "Bad named entity: trianglerighteq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trianglerighteq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trianglerighteq;", 
+            "description": "Named entity: trianglerighteq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tridot", 
+            "description": "Bad named entity: tridot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tridot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tridot;", 
+            "description": "Named entity: tridot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ec"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trie", 
+            "description": "Bad named entity: trie without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trie"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trie;", 
+            "description": "Named entity: trie; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u225c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triminus", 
+            "description": "Bad named entity: triminus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triminus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triminus;", 
+            "description": "Named entity: triminus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a3a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triplus", 
+            "description": "Bad named entity: triplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&triplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&triplus;", 
+            "description": "Named entity: triplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a39"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trisb", 
+            "description": "Bad named entity: trisb without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trisb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trisb;", 
+            "description": "Named entity: trisb; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29cd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tritime", 
+            "description": "Bad named entity: tritime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tritime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tritime;", 
+            "description": "Named entity: tritime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a3b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trpezium", 
+            "description": "Bad named entity: trpezium without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&trpezium"
+                ]
+            ]
+        }, 
+        {
+            "input": "&trpezium;", 
+            "description": "Named entity: trpezium; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u23e2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tscr", 
+            "description": "Bad named entity: tscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tscr;", 
+            "description": "Named entity: tscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcc9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tscy", 
+            "description": "Bad named entity: tscy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tscy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tscy;", 
+            "description": "Named entity: tscy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0446"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tshcy", 
+            "description": "Bad named entity: tshcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tshcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tshcy;", 
+            "description": "Named entity: tshcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u045b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tstrok", 
+            "description": "Bad named entity: tstrok without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&tstrok"
+                ]
+            ]
+        }, 
+        {
+            "input": "&tstrok;", 
+            "description": "Named entity: tstrok; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0167"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twixt", 
+            "description": "Bad named entity: twixt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&twixt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twixt;", 
+            "description": "Named entity: twixt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u226c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twoheadleftarrow", 
+            "description": "Bad named entity: twoheadleftarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&twoheadleftarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twoheadleftarrow;", 
+            "description": "Named entity: twoheadleftarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u219e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twoheadrightarrow", 
+            "description": "Bad named entity: twoheadrightarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&twoheadrightarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&twoheadrightarrow;", 
+            "description": "Named entity: twoheadrightarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21a0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uArr", 
+            "description": "Bad named entity: uArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uArr;", 
+            "description": "Named entity: uArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uHar", 
+            "description": "Bad named entity: uHar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uHar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uHar;", 
+            "description": "Named entity: uHar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2963"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uacute", 
+            "description": "Named entity: uacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uacute;", 
+            "description": "Named entity: uacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uarr", 
+            "description": "Bad named entity: uarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uarr;", 
+            "description": "Named entity: uarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2191"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ubrcy", 
+            "description": "Bad named entity: ubrcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ubrcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ubrcy;", 
+            "description": "Named entity: ubrcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u045e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ubreve", 
+            "description": "Bad named entity: ubreve without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ubreve"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ubreve;", 
+            "description": "Named entity: ubreve; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ucirc", 
+            "description": "Named entity: ucirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00fb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ucirc;", 
+            "description": "Named entity: ucirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00fb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ucy", 
+            "description": "Bad named entity: ucy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ucy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ucy;", 
+            "description": "Named entity: ucy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0443"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udarr", 
+            "description": "Bad named entity: udarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&udarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udarr;", 
+            "description": "Named entity: udarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udblac", 
+            "description": "Bad named entity: udblac without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&udblac"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udblac;", 
+            "description": "Named entity: udblac; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0171"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udhar", 
+            "description": "Bad named entity: udhar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&udhar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&udhar;", 
+            "description": "Named entity: udhar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u296e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ufisht", 
+            "description": "Bad named entity: ufisht without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ufisht"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ufisht;", 
+            "description": "Named entity: ufisht; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u297e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ufr", 
+            "description": "Bad named entity: ufr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ufr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ufr;", 
+            "description": "Named entity: ufr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd32"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ugrave", 
+            "description": "Named entity: ugrave without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ugrave;", 
+            "description": "Named entity: ugrave; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uharl", 
+            "description": "Bad named entity: uharl without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uharl"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uharl;", 
+            "description": "Named entity: uharl; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uharr", 
+            "description": "Bad named entity: uharr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uharr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uharr;", 
+            "description": "Named entity: uharr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uhblk", 
+            "description": "Bad named entity: uhblk without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uhblk"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uhblk;", 
+            "description": "Named entity: uhblk; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2580"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcorn", 
+            "description": "Bad named entity: ulcorn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ulcorn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcorn;", 
+            "description": "Named entity: ulcorn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcorner", 
+            "description": "Bad named entity: ulcorner without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ulcorner"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcorner;", 
+            "description": "Named entity: ulcorner; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcrop", 
+            "description": "Bad named entity: ulcrop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ulcrop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ulcrop;", 
+            "description": "Named entity: ulcrop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ultri", 
+            "description": "Bad named entity: ultri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ultri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ultri;", 
+            "description": "Named entity: ultri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&umacr", 
+            "description": "Bad named entity: umacr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&umacr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&umacr;", 
+            "description": "Named entity: umacr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uml", 
+            "description": "Named entity: uml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uml;", 
+            "description": "Named entity: uml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uogon", 
+            "description": "Bad named entity: uogon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uogon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uogon;", 
+            "description": "Named entity: uogon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0173"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uopf", 
+            "description": "Bad named entity: uopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uopf;", 
+            "description": "Named entity: uopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd66"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uparrow", 
+            "description": "Bad named entity: uparrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uparrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uparrow;", 
+            "description": "Named entity: uparrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2191"
+                ]
+            ]
+        }, 
+        {
+            "input": "&updownarrow", 
+            "description": "Bad named entity: updownarrow without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&updownarrow"
+                ]
+            ]
+        }, 
+        {
+            "input": "&updownarrow;", 
+            "description": "Named entity: updownarrow; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2195"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upharpoonleft", 
+            "description": "Bad named entity: upharpoonleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upharpoonleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upharpoonleft;", 
+            "description": "Named entity: upharpoonleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21bf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upharpoonright", 
+            "description": "Bad named entity: upharpoonright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upharpoonright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upharpoonright;", 
+            "description": "Named entity: upharpoonright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uplus", 
+            "description": "Bad named entity: uplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uplus;", 
+            "description": "Named entity: uplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsi", 
+            "description": "Bad named entity: upsi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upsi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsi;", 
+            "description": "Named entity: upsi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsih", 
+            "description": "Bad named entity: upsih without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upsih"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsih;", 
+            "description": "Named entity: upsih; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsilon", 
+            "description": "Bad named entity: upsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upsilon;", 
+            "description": "Named entity: upsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upuparrows", 
+            "description": "Bad named entity: upuparrows without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&upuparrows"
+                ]
+            ]
+        }, 
+        {
+            "input": "&upuparrows;", 
+            "description": "Named entity: upuparrows; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcorn", 
+            "description": "Bad named entity: urcorn without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&urcorn"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcorn;", 
+            "description": "Named entity: urcorn; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcorner", 
+            "description": "Bad named entity: urcorner without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&urcorner"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcorner;", 
+            "description": "Named entity: urcorner; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u231d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcrop", 
+            "description": "Bad named entity: urcrop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&urcrop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urcrop;", 
+            "description": "Named entity: urcrop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u230e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uring", 
+            "description": "Bad named entity: uring without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uring"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uring;", 
+            "description": "Named entity: uring; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u016f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urtri", 
+            "description": "Bad named entity: urtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&urtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&urtri;", 
+            "description": "Named entity: urtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uscr", 
+            "description": "Bad named entity: uscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uscr;", 
+            "description": "Named entity: uscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcca"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utdot", 
+            "description": "Bad named entity: utdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&utdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utdot;", 
+            "description": "Named entity: utdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22f0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utilde", 
+            "description": "Bad named entity: utilde without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&utilde"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utilde;", 
+            "description": "Named entity: utilde; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0169"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utri", 
+            "description": "Bad named entity: utri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&utri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utri;", 
+            "description": "Named entity: utri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utrif", 
+            "description": "Bad named entity: utrif without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&utrif"
+                ]
+            ]
+        }, 
+        {
+            "input": "&utrif;", 
+            "description": "Named entity: utrif; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b4"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uuarr", 
+            "description": "Bad named entity: uuarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uuarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uuarr;", 
+            "description": "Named entity: uuarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21c8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uuml", 
+            "description": "Named entity: uuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uuml;", 
+            "description": "Named entity: uuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uwangle", 
+            "description": "Bad named entity: uwangle without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&uwangle"
+                ]
+            ]
+        }, 
+        {
+            "input": "&uwangle;", 
+            "description": "Named entity: uwangle; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u29a7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vArr", 
+            "description": "Bad named entity: vArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vArr;", 
+            "description": "Named entity: vArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vBar", 
+            "description": "Bad named entity: vBar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vBar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vBar;", 
+            "description": "Named entity: vBar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vBarv", 
+            "description": "Bad named entity: vBarv without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vBarv"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vBarv;", 
+            "description": "Named entity: vBarv; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2ae9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vDash", 
+            "description": "Bad named entity: vDash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vDash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vDash;", 
+            "description": "Named entity: vDash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vangrt", 
+            "description": "Bad named entity: vangrt without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vangrt"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vangrt;", 
+            "description": "Named entity: vangrt; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u299c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varepsilon", 
+            "description": "Bad named entity: varepsilon without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varepsilon"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varepsilon;", 
+            "description": "Named entity: varepsilon; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varkappa", 
+            "description": "Bad named entity: varkappa without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varkappa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varkappa;", 
+            "description": "Named entity: varkappa; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varnothing", 
+            "description": "Bad named entity: varnothing without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varnothing"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varnothing;", 
+            "description": "Named entity: varnothing; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2205"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varphi", 
+            "description": "Bad named entity: varphi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varphi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varphi;", 
+            "description": "Named entity: varphi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varpi", 
+            "description": "Bad named entity: varpi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varpi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varpi;", 
+            "description": "Named entity: varpi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varpropto", 
+            "description": "Bad named entity: varpropto without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varpropto"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varpropto;", 
+            "description": "Named entity: varpropto; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varr", 
+            "description": "Bad named entity: varr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varr;", 
+            "description": "Named entity: varr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2195"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varrho", 
+            "description": "Bad named entity: varrho without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varrho"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varrho;", 
+            "description": "Named entity: varrho; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03f1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsigma", 
+            "description": "Bad named entity: varsigma without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varsigma"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsigma;", 
+            "description": "Named entity: varsigma; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsubsetneq", 
+            "description": "Bad named entity: varsubsetneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varsubsetneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsubsetneq;", 
+            "description": "Named entity: varsubsetneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228a\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsubsetneqq", 
+            "description": "Bad named entity: varsubsetneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varsubsetneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsubsetneqq;", 
+            "description": "Named entity: varsubsetneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acb\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsupsetneq", 
+            "description": "Bad named entity: varsupsetneq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varsupsetneq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsupsetneq;", 
+            "description": "Named entity: varsupsetneq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228b\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsupsetneqq", 
+            "description": "Bad named entity: varsupsetneqq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&varsupsetneqq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&varsupsetneqq;", 
+            "description": "Named entity: varsupsetneqq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acc\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartheta", 
+            "description": "Bad named entity: vartheta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vartheta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartheta;", 
+            "description": "Named entity: vartheta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03d1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartriangleleft", 
+            "description": "Bad named entity: vartriangleleft without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vartriangleleft"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartriangleleft;", 
+            "description": "Named entity: vartriangleleft; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartriangleright", 
+            "description": "Bad named entity: vartriangleright without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vartriangleright"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vartriangleright;", 
+            "description": "Named entity: vartriangleright; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vcy", 
+            "description": "Bad named entity: vcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vcy;", 
+            "description": "Named entity: vcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0432"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vdash", 
+            "description": "Bad named entity: vdash without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vdash"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vdash;", 
+            "description": "Named entity: vdash; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22a2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vee", 
+            "description": "Bad named entity: vee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vee;", 
+            "description": "Named entity: vee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2228"
+                ]
+            ]
+        }, 
+        {
+            "input": "&veebar", 
+            "description": "Bad named entity: veebar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&veebar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&veebar;", 
+            "description": "Named entity: veebar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22bb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&veeeq", 
+            "description": "Bad named entity: veeeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&veeeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&veeeq;", 
+            "description": "Named entity: veeeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u225a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vellip", 
+            "description": "Bad named entity: vellip without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vellip"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vellip;", 
+            "description": "Named entity: vellip; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22ee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&verbar", 
+            "description": "Bad named entity: verbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&verbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&verbar;", 
+            "description": "Named entity: verbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "|"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vert", 
+            "description": "Bad named entity: vert without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vert"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vert;", 
+            "description": "Named entity: vert; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "|"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vfr", 
+            "description": "Bad named entity: vfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vfr;", 
+            "description": "Named entity: vfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd33"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vltri", 
+            "description": "Bad named entity: vltri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vltri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vltri;", 
+            "description": "Named entity: vltri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vnsub", 
+            "description": "Bad named entity: vnsub without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vnsub"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vnsub;", 
+            "description": "Named entity: vnsub; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2282\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vnsup", 
+            "description": "Bad named entity: vnsup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vnsup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vnsup;", 
+            "description": "Named entity: vnsup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2283\u20d2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vopf", 
+            "description": "Bad named entity: vopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vopf;", 
+            "description": "Named entity: vopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd67"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vprop", 
+            "description": "Bad named entity: vprop without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vprop"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vprop;", 
+            "description": "Named entity: vprop; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u221d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vrtri", 
+            "description": "Bad named entity: vrtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vrtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vrtri;", 
+            "description": "Named entity: vrtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vscr", 
+            "description": "Bad named entity: vscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vscr;", 
+            "description": "Named entity: vscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udccb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsubnE", 
+            "description": "Bad named entity: vsubnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vsubnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsubnE;", 
+            "description": "Named entity: vsubnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acb\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsubne", 
+            "description": "Bad named entity: vsubne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vsubne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsubne;", 
+            "description": "Named entity: vsubne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228a\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsupnE", 
+            "description": "Bad named entity: vsupnE without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vsupnE"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsupnE;", 
+            "description": "Named entity: vsupnE; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2acc\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsupne", 
+            "description": "Bad named entity: vsupne without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vsupne"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vsupne;", 
+            "description": "Named entity: vsupne; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u228b\ufe00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vzigzag", 
+            "description": "Bad named entity: vzigzag without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&vzigzag"
+                ]
+            ]
+        }, 
+        {
+            "input": "&vzigzag;", 
+            "description": "Named entity: vzigzag; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u299a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wcirc", 
+            "description": "Bad named entity: wcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wcirc;", 
+            "description": "Named entity: wcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0175"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedbar", 
+            "description": "Bad named entity: wedbar without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wedbar"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedbar;", 
+            "description": "Named entity: wedbar; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a5f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedge", 
+            "description": "Bad named entity: wedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedge;", 
+            "description": "Named entity: wedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2227"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedgeq", 
+            "description": "Bad named entity: wedgeq without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wedgeq"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wedgeq;", 
+            "description": "Named entity: wedgeq; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2259"
+                ]
+            ]
+        }, 
+        {
+            "input": "&weierp", 
+            "description": "Bad named entity: weierp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&weierp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&weierp;", 
+            "description": "Named entity: weierp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2118"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wfr", 
+            "description": "Bad named entity: wfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wfr;", 
+            "description": "Named entity: wfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd34"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wopf", 
+            "description": "Bad named entity: wopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wopf;", 
+            "description": "Named entity: wopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd68"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wp", 
+            "description": "Bad named entity: wp without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wp"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wp;", 
+            "description": "Named entity: wp; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2118"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wr", 
+            "description": "Bad named entity: wr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wr;", 
+            "description": "Named entity: wr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2240"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wreath", 
+            "description": "Bad named entity: wreath without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wreath"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wreath;", 
+            "description": "Named entity: wreath; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2240"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wscr", 
+            "description": "Bad named entity: wscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&wscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&wscr;", 
+            "description": "Named entity: wscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udccc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcap", 
+            "description": "Bad named entity: xcap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xcap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcap;", 
+            "description": "Named entity: xcap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c2"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcirc", 
+            "description": "Bad named entity: xcirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xcirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcirc;", 
+            "description": "Named entity: xcirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25ef"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcup", 
+            "description": "Bad named entity: xcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xcup;", 
+            "description": "Named entity: xcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xdtri", 
+            "description": "Bad named entity: xdtri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xdtri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xdtri;", 
+            "description": "Named entity: xdtri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25bd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xfr", 
+            "description": "Bad named entity: xfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xfr;", 
+            "description": "Named entity: xfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd35"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xhArr", 
+            "description": "Bad named entity: xhArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xhArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xhArr;", 
+            "description": "Named entity: xhArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27fa"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xharr", 
+            "description": "Bad named entity: xharr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xharr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xharr;", 
+            "description": "Named entity: xharr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f7"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xi", 
+            "description": "Bad named entity: xi without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xi"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xi;", 
+            "description": "Named entity: xi; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03be"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xlArr", 
+            "description": "Bad named entity: xlArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xlArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xlArr;", 
+            "description": "Named entity: xlArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f8"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xlarr", 
+            "description": "Bad named entity: xlarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xlarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xlarr;", 
+            "description": "Named entity: xlarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xmap", 
+            "description": "Bad named entity: xmap without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xmap"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xmap;", 
+            "description": "Named entity: xmap; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27fc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xnis", 
+            "description": "Bad named entity: xnis without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xnis"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xnis;", 
+            "description": "Named entity: xnis; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22fb"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xodot", 
+            "description": "Bad named entity: xodot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xodot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xodot;", 
+            "description": "Named entity: xodot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a00"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xopf", 
+            "description": "Bad named entity: xopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xopf;", 
+            "description": "Named entity: xopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd69"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xoplus", 
+            "description": "Bad named entity: xoplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xoplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xoplus;", 
+            "description": "Named entity: xoplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a01"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xotime", 
+            "description": "Bad named entity: xotime without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xotime"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xotime;", 
+            "description": "Named entity: xotime; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a02"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xrArr", 
+            "description": "Bad named entity: xrArr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xrArr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xrArr;", 
+            "description": "Named entity: xrArr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f9"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xrarr", 
+            "description": "Bad named entity: xrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xrarr;", 
+            "description": "Named entity: xrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u27f6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xscr", 
+            "description": "Bad named entity: xscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xscr;", 
+            "description": "Named entity: xscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udccd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xsqcup", 
+            "description": "Bad named entity: xsqcup without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xsqcup"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xsqcup;", 
+            "description": "Named entity: xsqcup; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a06"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xuplus", 
+            "description": "Bad named entity: xuplus without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xuplus"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xuplus;", 
+            "description": "Named entity: xuplus; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2a04"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xutri", 
+            "description": "Bad named entity: xutri without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xutri"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xutri;", 
+            "description": "Named entity: xutri; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u25b3"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xvee", 
+            "description": "Bad named entity: xvee without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xvee"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xvee;", 
+            "description": "Named entity: xvee; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c1"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xwedge", 
+            "description": "Bad named entity: xwedge without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&xwedge"
+                ]
+            ]
+        }, 
+        {
+            "input": "&xwedge;", 
+            "description": "Named entity: xwedge; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u22c0"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yacute", 
+            "description": "Named entity: yacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00fd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yacute;", 
+            "description": "Named entity: yacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00fd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yacy", 
+            "description": "Bad named entity: yacy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yacy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yacy;", 
+            "description": "Named entity: yacy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044f"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ycirc", 
+            "description": "Bad named entity: ycirc without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ycirc"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ycirc;", 
+            "description": "Named entity: ycirc; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0177"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ycy", 
+            "description": "Bad named entity: ycy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&ycy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&ycy;", 
+            "description": "Named entity: ycy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yen", 
+            "description": "Named entity: yen without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yen;", 
+            "description": "Named entity: yen; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00a5"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yfr", 
+            "description": "Bad named entity: yfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yfr;", 
+            "description": "Named entity: yfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd36"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yicy", 
+            "description": "Bad named entity: yicy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yicy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yicy;", 
+            "description": "Named entity: yicy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0457"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yopf", 
+            "description": "Bad named entity: yopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yopf;", 
+            "description": "Named entity: yopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd6a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yscr", 
+            "description": "Bad named entity: yscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yscr;", 
+            "description": "Named entity: yscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udcce"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yucy", 
+            "description": "Bad named entity: yucy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&yucy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yucy;", 
+            "description": "Named entity: yucy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u044e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yuml", 
+            "description": "Named entity: yuml without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "\u00ff"
+                ]
+            ]
+        }, 
+        {
+            "input": "&yuml;", 
+            "description": "Named entity: yuml; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u00ff"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zacute", 
+            "description": "Bad named entity: zacute without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zacute"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zacute;", 
+            "description": "Named entity: zacute; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u017a"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zcaron", 
+            "description": "Bad named entity: zcaron without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zcaron"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zcaron;", 
+            "description": "Named entity: zcaron; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u017e"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zcy", 
+            "description": "Bad named entity: zcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zcy;", 
+            "description": "Named entity: zcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0437"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zdot", 
+            "description": "Bad named entity: zdot without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zdot"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zdot;", 
+            "description": "Named entity: zdot; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u017c"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zeetrf", 
+            "description": "Bad named entity: zeetrf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zeetrf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zeetrf;", 
+            "description": "Named entity: zeetrf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u2128"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zeta", 
+            "description": "Bad named entity: zeta without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zeta"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zeta;", 
+            "description": "Named entity: zeta; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u03b6"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zfr", 
+            "description": "Bad named entity: zfr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zfr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zfr;", 
+            "description": "Named entity: zfr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd37"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zhcy", 
+            "description": "Bad named entity: zhcy without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zhcy"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zhcy;", 
+            "description": "Named entity: zhcy; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u0436"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zigrarr", 
+            "description": "Bad named entity: zigrarr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zigrarr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zigrarr;", 
+            "description": "Named entity: zigrarr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u21dd"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zopf", 
+            "description": "Bad named entity: zopf without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zopf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zopf;", 
+            "description": "Named entity: zopf; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udd6b"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zscr", 
+            "description": "Bad named entity: zscr without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zscr"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zscr;", 
+            "description": "Named entity: zscr; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\ud835\udccf"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zwj", 
+            "description": "Bad named entity: zwj without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zwj"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zwj;", 
+            "description": "Named entity: zwj; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200d"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zwnj", 
+            "description": "Bad named entity: zwnj without a semi-colon", 
+            "output": [
+                "ParseError", 
+                [
+                    "Character", 
+                    "&zwnj"
+                ]
+            ]
+        }, 
+        {
+            "input": "&zwnj;", 
+            "description": "Named entity: zwnj; with a semi-colon", 
+            "output": [
+                [
+                    "Character", 
+                    "\u200c"
+                ]
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/html5lib/tests/testdata/tokenizer/numericEntities.test b/html5lib/tests/testdata/tokenizer/numericEntities.test
new file mode 100644
index 00000000..36c82281
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/numericEntities.test
@@ -0,0 +1,1313 @@
+{"tests": [
+
+{"description": "Invalid numeric entity character U+0000",
+"input": "&#x0000;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Invalid numeric entity character U+0001",
+"input": "",
+"output": ["ParseError", ["Character", "\u0001"]]},
+
+{"description": "Invalid numeric entity character U+0002",
+"input": "",
+"output": ["ParseError", ["Character", "\u0002"]]},
+
+{"description": "Invalid numeric entity character U+0003",
+"input": "",
+"output": ["ParseError", ["Character", "\u0003"]]},
+
+{"description": "Invalid numeric entity character U+0004",
+"input": "",
+"output": ["ParseError", ["Character", "\u0004"]]},
+
+{"description": "Invalid numeric entity character U+0005",
+"input": "&#x0005;",
+"output": ["ParseError", ["Character", "\u0005"]]},
+
+{"description": "Invalid numeric entity character U+0006",
+"input": "&#x0006;",
+"output": ["ParseError", ["Character", "\u0006"]]},
+
+{"description": "Invalid numeric entity character U+0007",
+"input": "&#x0007;",
+"output": ["ParseError", ["Character", "\u0007"]]},
+
+{"description": "Invalid numeric entity character U+0008",
+"input": "&#x0008;",
+"output": ["ParseError", ["Character", "\u0008"]]},
+
+{"description": "Invalid numeric entity character U+000B",
+"input": "&#x000b;",
+"output": ["ParseError", ["Character", "\u000b"]]},
+
+{"description": "Invalid numeric entity character U+000E",
+"input": "&#x000e;",
+"output": ["ParseError", ["Character", "\u000e"]]},
+
+{"description": "Invalid numeric entity character U+000F",
+"input": "&#x000f;",
+"output": ["ParseError", ["Character", "\u000f"]]},
+
+{"description": "Invalid numeric entity character U+0010",
+"input": "&#x0010;",
+"output": ["ParseError", ["Character", "\u0010"]]},
+
+{"description": "Invalid numeric entity character U+0011",
+"input": "&#x0011;",
+"output": ["ParseError", ["Character", "\u0011"]]},
+
+{"description": "Invalid numeric entity character U+0012",
+"input": "&#x0012;",
+"output": ["ParseError", ["Character", "\u0012"]]},
+
+{"description": "Invalid numeric entity character U+0013",
+"input": "&#x0013;",
+"output": ["ParseError", ["Character", "\u0013"]]},
+
+{"description": "Invalid numeric entity character U+0014",
+"input": "&#x0014;",
+"output": ["ParseError", ["Character", "\u0014"]]},
+
+{"description": "Invalid numeric entity character U+0015",
+"input": "&#x0015;",
+"output": ["ParseError", ["Character", "\u0015"]]},
+
+{"description": "Invalid numeric entity character U+0016",
+"input": "&#x0016;",
+"output": ["ParseError", ["Character", "\u0016"]]},
+
+{"description": "Invalid numeric entity character U+0017",
+"input": "&#x0017;",
+"output": ["ParseError", ["Character", "\u0017"]]},
+
+{"description": "Invalid numeric entity character U+0018",
+"input": "&#x0018;",
+"output": ["ParseError", ["Character", "\u0018"]]},
+
+{"description": "Invalid numeric entity character U+0019",
+"input": "&#x0019;",
+"output": ["ParseError", ["Character", "\u0019"]]},
+
+{"description": "Invalid numeric entity character U+001A",
+"input": "&#x001a;",
+"output": ["ParseError", ["Character", "\u001a"]]},
+
+{"description": "Invalid numeric entity character U+001B",
+"input": "&#x001b;",
+"output": ["ParseError", ["Character", "\u001b"]]},
+
+{"description": "Invalid numeric entity character U+001C",
+"input": "&#x001c;",
+"output": ["ParseError", ["Character", "\u001c"]]},
+
+{"description": "Invalid numeric entity character U+001D",
+"input": "&#x001d;",
+"output": ["ParseError", ["Character", "\u001d"]]},
+
+{"description": "Invalid numeric entity character U+001E",
+"input": "&#x001e;",
+"output": ["ParseError", ["Character", "\u001e"]]},
+
+{"description": "Invalid numeric entity character U+001F",
+"input": "&#x001f;",
+"output": ["ParseError", ["Character", "\u001f"]]},
+
+{"description": "Invalid numeric entity character U+007F",
+"input": "&#x007f;",
+"output": ["ParseError", ["Character", "\u007f"]]},
+
+{"description": "Invalid numeric entity character U+D800",
+"input": "&#xd800;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Invalid numeric entity character U+DFFF",
+"input": "&#xdfff;",
+"output": ["ParseError", ["Character", "\uFFFD"]]},
+
+{"description": "Invalid numeric entity character U+FDD0",
+"input": "&#xfdd0;",
+"output": ["ParseError", ["Character", "\ufdd0"]]},
+
+{"description": "Invalid numeric entity character U+FDD1",
+"input": "&#xfdd1;",
+"output": ["ParseError", ["Character", "\ufdd1"]]},
+
+{"description": "Invalid numeric entity character U+FDD2",
+"input": "&#xfdd2;",
+"output": ["ParseError", ["Character", "\ufdd2"]]},
+
+{"description": "Invalid numeric entity character U+FDD3",
+"input": "&#xfdd3;",
+"output": ["ParseError", ["Character", "\ufdd3"]]},
+
+{"description": "Invalid numeric entity character U+FDD4",
+"input": "&#xfdd4;",
+"output": ["ParseError", ["Character", "\ufdd4"]]},
+
+{"description": "Invalid numeric entity character U+FDD5",
+"input": "&#xfdd5;",
+"output": ["ParseError", ["Character", "\ufdd5"]]},
+
+{"description": "Invalid numeric entity character U+FDD6",
+"input": "&#xfdd6;",
+"output": ["ParseError", ["Character", "\ufdd6"]]},
+
+{"description": "Invalid numeric entity character U+FDD7",
+"input": "&#xfdd7;",
+"output": ["ParseError", ["Character", "\ufdd7"]]},
+
+{"description": "Invalid numeric entity character U+FDD8",
+"input": "&#xfdd8;",
+"output": ["ParseError", ["Character", "\ufdd8"]]},
+
+{"description": "Invalid numeric entity character U+FDD9",
+"input": "&#xfdd9;",
+"output": ["ParseError", ["Character", "\ufdd9"]]},
+
+{"description": "Invalid numeric entity character U+FDDA",
+"input": "&#xfdda;",
+"output": ["ParseError", ["Character", "\ufdda"]]},
+
+{"description": "Invalid numeric entity character U+FDDB",
+"input": "&#xfddb;",
+"output": ["ParseError", ["Character", "\ufddb"]]},
+
+{"description": "Invalid numeric entity character U+FDDC",
+"input": "&#xfddc;",
+"output": ["ParseError", ["Character", "\ufddc"]]},
+
+{"description": "Invalid numeric entity character U+FDDD",
+"input": "&#xfddd;",
+"output": ["ParseError", ["Character", "\ufddd"]]},
+
+{"description": "Invalid numeric entity character U+FDDE",
+"input": "&#xfdde;",
+"output": ["ParseError", ["Character", "\ufdde"]]},
+
+{"description": "Invalid numeric entity character U+FDDF",
+"input": "&#xfddf;",
+"output": ["ParseError", ["Character", "\ufddf"]]},
+
+{"description": "Invalid numeric entity character U+FDE0",
+"input": "&#xfde0;",
+"output": ["ParseError", ["Character", "\ufde0"]]},
+
+{"description": "Invalid numeric entity character U+FDE1",
+"input": "&#xfde1;",
+"output": ["ParseError", ["Character", "\ufde1"]]},
+
+{"description": "Invalid numeric entity character U+FDE2",
+"input": "&#xfde2;",
+"output": ["ParseError", ["Character", "\ufde2"]]},
+
+{"description": "Invalid numeric entity character U+FDE3",
+"input": "&#xfde3;",
+"output": ["ParseError", ["Character", "\ufde3"]]},
+
+{"description": "Invalid numeric entity character U+FDE4",
+"input": "&#xfde4;",
+"output": ["ParseError", ["Character", "\ufde4"]]},
+
+{"description": "Invalid numeric entity character U+FDE5",
+"input": "&#xfde5;",
+"output": ["ParseError", ["Character", "\ufde5"]]},
+
+{"description": "Invalid numeric entity character U+FDE6",
+"input": "&#xfde6;",
+"output": ["ParseError", ["Character", "\ufde6"]]},
+
+{"description": "Invalid numeric entity character U+FDE7",
+"input": "&#xfde7;",
+"output": ["ParseError", ["Character", "\ufde7"]]},
+
+{"description": "Invalid numeric entity character U+FDE8",
+"input": "&#xfde8;",
+"output": ["ParseError", ["Character", "\ufde8"]]},
+
+{"description": "Invalid numeric entity character U+FDE9",
+"input": "&#xfde9;",
+"output": ["ParseError", ["Character", "\ufde9"]]},
+
+{"description": "Invalid numeric entity character U+FDEA",
+"input": "&#xfdea;",
+"output": ["ParseError", ["Character", "\ufdea"]]},
+
+{"description": "Invalid numeric entity character U+FDEB",
+"input": "&#xfdeb;",
+"output": ["ParseError", ["Character", "\ufdeb"]]},
+
+{"description": "Invalid numeric entity character U+FDEC",
+"input": "&#xfdec;",
+"output": ["ParseError", ["Character", "\ufdec"]]},
+
+{"description": "Invalid numeric entity character U+FDED",
+"input": "&#xfded;",
+"output": ["ParseError", ["Character", "\ufded"]]},
+
+{"description": "Invalid numeric entity character U+FDEE",
+"input": "&#xfdee;",
+"output": ["ParseError", ["Character", "\ufdee"]]},
+
+{"description": "Invalid numeric entity character U+FDEF",
+"input": "&#xfdef;",
+"output": ["ParseError", ["Character", "\ufdef"]]},
+
+{"description": "Invalid numeric entity character U+FFFE",
+"input": "&#xfffe;",
+"output": ["ParseError", ["Character", "\ufffe"]]},
+
+{"description": "Invalid numeric entity character U+FFFF",
+"input": "&#xffff;",
+"output": ["ParseError", ["Character", "\uffff"]]},
+
+{"description": "Invalid numeric entity character U+1FFFE",
+"input": "&#x1fffe;",
+"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+1FFFF",
+"input": "&#x1ffff;",
+"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+2FFFE",
+"input": "&#x2fffe;",
+"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+2FFFF",
+"input": "&#x2ffff;",
+"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+3FFFE",
+"input": "&#x3fffe;",
+"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+3FFFF",
+"input": "&#x3ffff;",
+"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+4FFFE",
+"input": "&#x4fffe;",
+"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+4FFFF",
+"input": "&#x4ffff;",
+"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+5FFFE",
+"input": "&#x5fffe;",
+"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+5FFFF",
+"input": "&#x5ffff;",
+"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+6FFFE",
+"input": "&#x6fffe;",
+"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+6FFFF",
+"input": "&#x6ffff;",
+"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+7FFFE",
+"input": "&#x7fffe;",
+"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+7FFFF",
+"input": "&#x7ffff;",
+"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+8FFFE",
+"input": "&#x8fffe;",
+"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+8FFFF",
+"input": "&#x8ffff;",
+"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+9FFFE",
+"input": "&#x9fffe;",
+"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+9FFFF",
+"input": "&#x9ffff;",
+"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+AFFFE",
+"input": "&#xafffe;",
+"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+AFFFF",
+"input": "&#xaffff;",
+"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+BFFFE",
+"input": "&#xbfffe;",
+"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+BFFFF",
+"input": "&#xbffff;",
+"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+CFFFE",
+"input": "&#xcfffe;",
+"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+CFFFF",
+"input": "&#xcffff;",
+"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+DFFFE",
+"input": "&#xdfffe;",
+"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+DFFFF",
+"input": "&#xdffff;",
+"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+EFFFE",
+"input": "&#xefffe;",
+"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+EFFFF",
+"input": "&#xeffff;",
+"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+FFFFE",
+"input": "&#xffffe;",
+"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+FFFFF",
+"input": "&#xfffff;",
+"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]},
+
+{"description": "Invalid numeric entity character U+10FFFE",
+"input": "&#x10fffe;",
+"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]},
+
+{"description": "Invalid numeric entity character U+10FFFF",
+"input": "&#x10ffff;",
+"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]},
+
+{"description": "Valid numeric entity character U+0009",
+"input": "&#x0009;",
+"output": [["Character", "\u0009"]]},
+
+{"description": "Valid numeric entity character U+000A",
+"input": "&#x000a;",
+"output": [["Character", "\u000A"]]},
+
+{"description": "Valid numeric entity character U+0020",
+"input": "&#x0020;",
+"output": [["Character", "\u0020"]]},
+
+{"description": "Valid numeric entity character U+0021",
+"input": "&#x0021;",
+"output": [["Character", "\u0021"]]},
+
+{"description": "Valid numeric entity character U+0022",
+"input": "&#x0022;",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Valid numeric entity character U+0023",
+"input": "&#x0023;",
+"output": [["Character", "\u0023"]]},
+
+{"description": "Valid numeric entity character U+0024",
+"input": "&#x0024;",
+"output": [["Character", "\u0024"]]},
+
+{"description": "Valid numeric entity character U+0025",
+"input": "&#x0025;",
+"output": [["Character", "\u0025"]]},
+
+{"description": "Valid numeric entity character U+0026",
+"input": "&#x0026;",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Valid numeric entity character U+0027",
+"input": "&#x0027;",
+"output": [["Character", "\u0027"]]},
+
+{"description": "Valid numeric entity character U+0028",
+"input": "&#x0028;",
+"output": [["Character", "\u0028"]]},
+
+{"description": "Valid numeric entity character U+0029",
+"input": "&#x0029;",
+"output": [["Character", "\u0029"]]},
+
+{"description": "Valid numeric entity character U+002A",
+"input": "&#x002a;",
+"output": [["Character", "\u002A"]]},
+
+{"description": "Valid numeric entity character U+002B",
+"input": "&#x002b;",
+"output": [["Character", "\u002B"]]},
+
+{"description": "Valid numeric entity character U+002C",
+"input": "&#x002c;",
+"output": [["Character", "\u002C"]]},
+
+{"description": "Valid numeric entity character U+002D",
+"input": "&#x002d;",
+"output": [["Character", "\u002D"]]},
+
+{"description": "Valid numeric entity character U+002E",
+"input": "&#x002e;",
+"output": [["Character", "\u002E"]]},
+
+{"description": "Valid numeric entity character U+002F",
+"input": "&#x002f;",
+"output": [["Character", "\u002F"]]},
+
+{"description": "Valid numeric entity character U+0030",
+"input": "&#x0030;",
+"output": [["Character", "\u0030"]]},
+
+{"description": "Valid numeric entity character U+0031",
+"input": "&#x0031;",
+"output": [["Character", "\u0031"]]},
+
+{"description": "Valid numeric entity character U+0032",
+"input": "&#x0032;",
+"output": [["Character", "\u0032"]]},
+
+{"description": "Valid numeric entity character U+0033",
+"input": "&#x0033;",
+"output": [["Character", "\u0033"]]},
+
+{"description": "Valid numeric entity character U+0034",
+"input": "&#x0034;",
+"output": [["Character", "\u0034"]]},
+
+{"description": "Valid numeric entity character U+0035",
+"input": "&#x0035;",
+"output": [["Character", "\u0035"]]},
+
+{"description": "Valid numeric entity character U+0036",
+"input": "&#x0036;",
+"output": [["Character", "\u0036"]]},
+
+{"description": "Valid numeric entity character U+0037",
+"input": "&#x0037;",
+"output": [["Character", "\u0037"]]},
+
+{"description": "Valid numeric entity character U+0038",
+"input": "&#x0038;",
+"output": [["Character", "\u0038"]]},
+
+{"description": "Valid numeric entity character U+0039",
+"input": "&#x0039;",
+"output": [["Character", "\u0039"]]},
+
+{"description": "Valid numeric entity character U+003A",
+"input": "&#x003a;",
+"output": [["Character", "\u003A"]]},
+
+{"description": "Valid numeric entity character U+003B",
+"input": "&#x003b;",
+"output": [["Character", "\u003B"]]},
+
+{"description": "Valid numeric entity character U+003C",
+"input": "&#x003c;",
+"output": [["Character", "\u003C"]]},
+
+{"description": "Valid numeric entity character U+003D",
+"input": "&#x003d;",
+"output": [["Character", "\u003D"]]},
+
+{"description": "Valid numeric entity character U+003E",
+"input": "&#x003e;",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Valid numeric entity character U+003F",
+"input": "&#x003f;",
+"output": [["Character", "\u003F"]]},
+
+{"description": "Valid numeric entity character U+0040",
+"input": "&#x0040;",
+"output": [["Character", "\u0040"]]},
+
+{"description": "Valid numeric entity character U+0041",
+"input": "&#x0041;",
+"output": [["Character", "\u0041"]]},
+
+{"description": "Valid numeric entity character U+0042",
+"input": "&#x0042;",
+"output": [["Character", "\u0042"]]},
+
+{"description": "Valid numeric entity character U+0043",
+"input": "&#x0043;",
+"output": [["Character", "\u0043"]]},
+
+{"description": "Valid numeric entity character U+0044",
+"input": "&#x0044;",
+"output": [["Character", "\u0044"]]},
+
+{"description": "Valid numeric entity character U+0045",
+"input": "&#x0045;",
+"output": [["Character", "\u0045"]]},
+
+{"description": "Valid numeric entity character U+0046",
+"input": "&#x0046;",
+"output": [["Character", "\u0046"]]},
+
+{"description": "Valid numeric entity character U+0047",
+"input": "&#x0047;",
+"output": [["Character", "\u0047"]]},
+
+{"description": "Valid numeric entity character U+0048",
+"input": "&#x0048;",
+"output": [["Character", "\u0048"]]},
+
+{"description": "Valid numeric entity character U+0049",
+"input": "&#x0049;",
+"output": [["Character", "\u0049"]]},
+
+{"description": "Valid numeric entity character U+004A",
+"input": "&#x004a;",
+"output": [["Character", "\u004A"]]},
+
+{"description": "Valid numeric entity character U+004B",
+"input": "&#x004b;",
+"output": [["Character", "\u004B"]]},
+
+{"description": "Valid numeric entity character U+004C",
+"input": "&#x004c;",
+"output": [["Character", "\u004C"]]},
+
+{"description": "Valid numeric entity character U+004D",
+"input": "&#x004d;",
+"output": [["Character", "\u004D"]]},
+
+{"description": "Valid numeric entity character U+004E",
+"input": "&#x004e;",
+"output": [["Character", "\u004E"]]},
+
+{"description": "Valid numeric entity character U+004F",
+"input": "&#x004f;",
+"output": [["Character", "\u004F"]]},
+
+{"description": "Valid numeric entity character U+0050",
+"input": "&#x0050;",
+"output": [["Character", "\u0050"]]},
+
+{"description": "Valid numeric entity character U+0051",
+"input": "&#x0051;",
+"output": [["Character", "\u0051"]]},
+
+{"description": "Valid numeric entity character U+0052",
+"input": "&#x0052;",
+"output": [["Character", "\u0052"]]},
+
+{"description": "Valid numeric entity character U+0053",
+"input": "&#x0053;",
+"output": [["Character", "\u0053"]]},
+
+{"description": "Valid numeric entity character U+0054",
+"input": "&#x0054;",
+"output": [["Character", "\u0054"]]},
+
+{"description": "Valid numeric entity character U+0055",
+"input": "&#x0055;",
+"output": [["Character", "\u0055"]]},
+
+{"description": "Valid numeric entity character U+0056",
+"input": "&#x0056;",
+"output": [["Character", "\u0056"]]},
+
+{"description": "Valid numeric entity character U+0057",
+"input": "&#x0057;",
+"output": [["Character", "\u0057"]]},
+
+{"description": "Valid numeric entity character U+0058",
+"input": "&#x0058;",
+"output": [["Character", "\u0058"]]},
+
+{"description": "Valid numeric entity character U+0059",
+"input": "&#x0059;",
+"output": [["Character", "\u0059"]]},
+
+{"description": "Valid numeric entity character U+005A",
+"input": "&#x005a;",
+"output": [["Character", "\u005A"]]},
+
+{"description": "Valid numeric entity character U+005B",
+"input": "&#x005b;",
+"output": [["Character", "\u005B"]]},
+
+{"description": "Valid numeric entity character U+005C",
+"input": "&#x005c;",
+"output": [["Character", "\u005C"]]},
+
+{"description": "Valid numeric entity character U+005D",
+"input": "&#x005d;",
+"output": [["Character", "\u005D"]]},
+
+{"description": "Valid numeric entity character U+005E",
+"input": "&#x005e;",
+"output": [["Character", "\u005E"]]},
+
+{"description": "Valid numeric entity character U+005F",
+"input": "&#x005f;",
+"output": [["Character", "\u005F"]]},
+
+{"description": "Valid numeric entity character U+0060",
+"input": "&#x0060;",
+"output": [["Character", "\u0060"]]},
+
+{"description": "Valid numeric entity character U+0061",
+"input": "&#x0061;",
+"output": [["Character", "\u0061"]]},
+
+{"description": "Valid numeric entity character U+0062",
+"input": "&#x0062;",
+"output": [["Character", "\u0062"]]},
+
+{"description": "Valid numeric entity character U+0063",
+"input": "&#x0063;",
+"output": [["Character", "\u0063"]]},
+
+{"description": "Valid numeric entity character U+0064",
+"input": "&#x0064;",
+"output": [["Character", "\u0064"]]},
+
+{"description": "Valid numeric entity character U+0065",
+"input": "&#x0065;",
+"output": [["Character", "\u0065"]]},
+
+{"description": "Valid numeric entity character U+0066",
+"input": "&#x0066;",
+"output": [["Character", "\u0066"]]},
+
+{"description": "Valid numeric entity character U+0067",
+"input": "&#x0067;",
+"output": [["Character", "\u0067"]]},
+
+{"description": "Valid numeric entity character U+0068",
+"input": "&#x0068;",
+"output": [["Character", "\u0068"]]},
+
+{"description": "Valid numeric entity character U+0069",
+"input": "&#x0069;",
+"output": [["Character", "\u0069"]]},
+
+{"description": "Valid numeric entity character U+006A",
+"input": "&#x006a;",
+"output": [["Character", "\u006A"]]},
+
+{"description": "Valid numeric entity character U+006B",
+"input": "&#x006b;",
+"output": [["Character", "\u006B"]]},
+
+{"description": "Valid numeric entity character U+006C",
+"input": "&#x006c;",
+"output": [["Character", "\u006C"]]},
+
+{"description": "Valid numeric entity character U+006D",
+"input": "&#x006d;",
+"output": [["Character", "\u006D"]]},
+
+{"description": "Valid numeric entity character U+006E",
+"input": "&#x006e;",
+"output": [["Character", "\u006E"]]},
+
+{"description": "Valid numeric entity character U+006F",
+"input": "&#x006f;",
+"output": [["Character", "\u006F"]]},
+
+{"description": "Valid numeric entity character U+0070",
+"input": "&#x0070;",
+"output": [["Character", "\u0070"]]},
+
+{"description": "Valid numeric entity character U+0071",
+"input": "&#x0071;",
+"output": [["Character", "\u0071"]]},
+
+{"description": "Valid numeric entity character U+0072",
+"input": "&#x0072;",
+"output": [["Character", "\u0072"]]},
+
+{"description": "Valid numeric entity character U+0073",
+"input": "&#x0073;",
+"output": [["Character", "\u0073"]]},
+
+{"description": "Valid numeric entity character U+0074",
+"input": "&#x0074;",
+"output": [["Character", "\u0074"]]},
+
+{"description": "Valid numeric entity character U+0075",
+"input": "&#x0075;",
+"output": [["Character", "\u0075"]]},
+
+{"description": "Valid numeric entity character U+0076",
+"input": "&#x0076;",
+"output": [["Character", "\u0076"]]},
+
+{"description": "Valid numeric entity character U+0077",
+"input": "&#x0077;",
+"output": [["Character", "\u0077"]]},
+
+{"description": "Valid numeric entity character U+0078",
+"input": "&#x0078;",
+"output": [["Character", "\u0078"]]},
+
+{"description": "Valid numeric entity character U+0079",
+"input": "&#x0079;",
+"output": [["Character", "\u0079"]]},
+
+{"description": "Valid numeric entity character U+007A",
+"input": "&#x007a;",
+"output": [["Character", "\u007A"]]},
+
+{"description": "Valid numeric entity character U+007B",
+"input": "&#x007b;",
+"output": [["Character", "\u007B"]]},
+
+{"description": "Valid numeric entity character U+007C",
+"input": "&#x007c;",
+"output": [["Character", "\u007C"]]},
+
+{"description": "Valid numeric entity character U+007D",
+"input": "&#x007d;",
+"output": [["Character", "\u007D"]]},
+
+{"description": "Valid numeric entity character U+007E",
+"input": "&#x007e;",
+"output": [["Character", "\u007E"]]},
+
+{"description": "Valid numeric entity character U+00A0",
+"input": "&#x00a0;",
+"output": [["Character", "\u00A0"]]},
+
+{"description": "Valid numeric entity character U+00A1",
+"input": "&#x00a1;",
+"output": [["Character", "\u00A1"]]},
+
+{"description": "Valid numeric entity character U+00A2",
+"input": "&#x00a2;",
+"output": [["Character", "\u00A2"]]},
+
+{"description": "Valid numeric entity character U+00A3",
+"input": "&#x00a3;",
+"output": [["Character", "\u00A3"]]},
+
+{"description": "Valid numeric entity character U+00A4",
+"input": "&#x00a4;",
+"output": [["Character", "\u00A4"]]},
+
+{"description": "Valid numeric entity character U+00A5",
+"input": "&#x00a5;",
+"output": [["Character", "\u00A5"]]},
+
+{"description": "Valid numeric entity character U+00A6",
+"input": "&#x00a6;",
+"output": [["Character", "\u00A6"]]},
+
+{"description": "Valid numeric entity character U+00A7",
+"input": "&#x00a7;",
+"output": [["Character", "\u00A7"]]},
+
+{"description": "Valid numeric entity character U+00A8",
+"input": "&#x00a8;",
+"output": [["Character", "\u00A8"]]},
+
+{"description": "Valid numeric entity character U+00A9",
+"input": "&#x00a9;",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Valid numeric entity character U+00AA",
+"input": "&#x00aa;",
+"output": [["Character", "\u00AA"]]},
+
+{"description": "Valid numeric entity character U+00AB",
+"input": "&#x00ab;",
+"output": [["Character", "\u00AB"]]},
+
+{"description": "Valid numeric entity character U+00AC",
+"input": "&#x00ac;",
+"output": [["Character", "\u00AC"]]},
+
+{"description": "Valid numeric entity character U+00AD",
+"input": "&#x00ad;",
+"output": [["Character", "\u00AD"]]},
+
+{"description": "Valid numeric entity character U+00AE",
+"input": "&#x00ae;",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Valid numeric entity character U+00AF",
+"input": "&#x00af;",
+"output": [["Character", "\u00AF"]]},
+
+{"description": "Valid numeric entity character U+00B0",
+"input": "&#x00b0;",
+"output": [["Character", "\u00B0"]]},
+
+{"description": "Valid numeric entity character U+00B1",
+"input": "&#x00b1;",
+"output": [["Character", "\u00B1"]]},
+
+{"description": "Valid numeric entity character U+00B2",
+"input": "&#x00b2;",
+"output": [["Character", "\u00B2"]]},
+
+{"description": "Valid numeric entity character U+00B3",
+"input": "&#x00b3;",
+"output": [["Character", "\u00B3"]]},
+
+{"description": "Valid numeric entity character U+00B4",
+"input": "&#x00b4;",
+"output": [["Character", "\u00B4"]]},
+
+{"description": "Valid numeric entity character U+00B5",
+"input": "&#x00b5;",
+"output": [["Character", "\u00B5"]]},
+
+{"description": "Valid numeric entity character U+00B6",
+"input": "&#x00b6;",
+"output": [["Character", "\u00B6"]]},
+
+{"description": "Valid numeric entity character U+00B7",
+"input": "&#x00b7;",
+"output": [["Character", "\u00B7"]]},
+
+{"description": "Valid numeric entity character U+00B8",
+"input": "&#x00b8;",
+"output": [["Character", "\u00B8"]]},
+
+{"description": "Valid numeric entity character U+00B9",
+"input": "&#x00b9;",
+"output": [["Character", "\u00B9"]]},
+
+{"description": "Valid numeric entity character U+00BA",
+"input": "&#x00ba;",
+"output": [["Character", "\u00BA"]]},
+
+{"description": "Valid numeric entity character U+00BB",
+"input": "&#x00bb;",
+"output": [["Character", "\u00BB"]]},
+
+{"description": "Valid numeric entity character U+00BC",
+"input": "&#x00bc;",
+"output": [["Character", "\u00BC"]]},
+
+{"description": "Valid numeric entity character U+00BD",
+"input": "&#x00bd;",
+"output": [["Character", "\u00BD"]]},
+
+{"description": "Valid numeric entity character U+00BE",
+"input": "&#x00be;",
+"output": [["Character", "\u00BE"]]},
+
+{"description": "Valid numeric entity character U+00BF",
+"input": "&#x00bf;",
+"output": [["Character", "\u00BF"]]},
+
+{"description": "Valid numeric entity character U+00C0",
+"input": "&#x00c0;",
+"output": [["Character", "\u00C0"]]},
+
+{"description": "Valid numeric entity character U+00C1",
+"input": "&#x00c1;",
+"output": [["Character", "\u00C1"]]},
+
+{"description": "Valid numeric entity character U+00C2",
+"input": "&#x00c2;",
+"output": [["Character", "\u00C2"]]},
+
+{"description": "Valid numeric entity character U+00C3",
+"input": "&#x00c3;",
+"output": [["Character", "\u00C3"]]},
+
+{"description": "Valid numeric entity character U+00C4",
+"input": "&#x00c4;",
+"output": [["Character", "\u00C4"]]},
+
+{"description": "Valid numeric entity character U+00C5",
+"input": "&#x00c5;",
+"output": [["Character", "\u00C5"]]},
+
+{"description": "Valid numeric entity character U+00C6",
+"input": "&#x00c6;",
+"output": [["Character", "\u00C6"]]},
+
+{"description": "Valid numeric entity character U+00C7",
+"input": "&#x00c7;",
+"output": [["Character", "\u00C7"]]},
+
+{"description": "Valid numeric entity character U+00C8",
+"input": "&#x00c8;",
+"output": [["Character", "\u00C8"]]},
+
+{"description": "Valid numeric entity character U+00C9",
+"input": "&#x00c9;",
+"output": [["Character", "\u00C9"]]},
+
+{"description": "Valid numeric entity character U+00CA",
+"input": "&#x00ca;",
+"output": [["Character", "\u00CA"]]},
+
+{"description": "Valid numeric entity character U+00CB",
+"input": "&#x00cb;",
+"output": [["Character", "\u00CB"]]},
+
+{"description": "Valid numeric entity character U+00CC",
+"input": "&#x00cc;",
+"output": [["Character", "\u00CC"]]},
+
+{"description": "Valid numeric entity character U+00CD",
+"input": "&#x00cd;",
+"output": [["Character", "\u00CD"]]},
+
+{"description": "Valid numeric entity character U+00CE",
+"input": "&#x00ce;",
+"output": [["Character", "\u00CE"]]},
+
+{"description": "Valid numeric entity character U+00CF",
+"input": "&#x00cf;",
+"output": [["Character", "\u00CF"]]},
+
+{"description": "Valid numeric entity character U+00D0",
+"input": "&#x00d0;",
+"output": [["Character", "\u00D0"]]},
+
+{"description": "Valid numeric entity character U+00D1",
+"input": "&#x00d1;",
+"output": [["Character", "\u00D1"]]},
+
+{"description": "Valid numeric entity character U+00D2",
+"input": "&#x00d2;",
+"output": [["Character", "\u00D2"]]},
+
+{"description": "Valid numeric entity character U+00D3",
+"input": "&#x00d3;",
+"output": [["Character", "\u00D3"]]},
+
+{"description": "Valid numeric entity character U+00D4",
+"input": "&#x00d4;",
+"output": [["Character", "\u00D4"]]},
+
+{"description": "Valid numeric entity character U+00D5",
+"input": "&#x00d5;",
+"output": [["Character", "\u00D5"]]},
+
+{"description": "Valid numeric entity character U+00D6",
+"input": "&#x00d6;",
+"output": [["Character", "\u00D6"]]},
+
+{"description": "Valid numeric entity character U+00D7",
+"input": "&#x00d7;",
+"output": [["Character", "\u00D7"]]},
+
+{"description": "Valid numeric entity character U+00D8",
+"input": "&#x00d8;",
+"output": [["Character", "\u00D8"]]},
+
+{"description": "Valid numeric entity character U+00D9",
+"input": "&#x00d9;",
+"output": [["Character", "\u00D9"]]},
+
+{"description": "Valid numeric entity character U+00DA",
+"input": "&#x00da;",
+"output": [["Character", "\u00DA"]]},
+
+{"description": "Valid numeric entity character U+00DB",
+"input": "&#x00db;",
+"output": [["Character", "\u00DB"]]},
+
+{"description": "Valid numeric entity character U+00DC",
+"input": "&#x00dc;",
+"output": [["Character", "\u00DC"]]},
+
+{"description": "Valid numeric entity character U+00DD",
+"input": "&#x00dd;",
+"output": [["Character", "\u00DD"]]},
+
+{"description": "Valid numeric entity character U+00DE",
+"input": "&#x00de;",
+"output": [["Character", "\u00DE"]]},
+
+{"description": "Valid numeric entity character U+00DF",
+"input": "&#x00df;",
+"output": [["Character", "\u00DF"]]},
+
+{"description": "Valid numeric entity character U+00E0",
+"input": "&#x00e0;",
+"output": [["Character", "\u00E0"]]},
+
+{"description": "Valid numeric entity character U+00E1",
+"input": "&#x00e1;",
+"output": [["Character", "\u00E1"]]},
+
+{"description": "Valid numeric entity character U+00E2",
+"input": "&#x00e2;",
+"output": [["Character", "\u00E2"]]},
+
+{"description": "Valid numeric entity character U+00E3",
+"input": "&#x00e3;",
+"output": [["Character", "\u00E3"]]},
+
+{"description": "Valid numeric entity character U+00E4",
+"input": "&#x00e4;",
+"output": [["Character", "\u00E4"]]},
+
+{"description": "Valid numeric entity character U+00E5",
+"input": "&#x00e5;",
+"output": [["Character", "\u00E5"]]},
+
+{"description": "Valid numeric entity character U+00E6",
+"input": "&#x00e6;",
+"output": [["Character", "\u00E6"]]},
+
+{"description": "Valid numeric entity character U+00E7",
+"input": "&#x00e7;",
+"output": [["Character", "\u00E7"]]},
+
+{"description": "Valid numeric entity character U+00E8",
+"input": "&#x00e8;",
+"output": [["Character", "\u00E8"]]},
+
+{"description": "Valid numeric entity character U+00E9",
+"input": "&#x00e9;",
+"output": [["Character", "\u00E9"]]},
+
+{"description": "Valid numeric entity character U+00EA",
+"input": "&#x00ea;",
+"output": [["Character", "\u00EA"]]},
+
+{"description": "Valid numeric entity character U+00EB",
+"input": "&#x00eb;",
+"output": [["Character", "\u00EB"]]},
+
+{"description": "Valid numeric entity character U+00EC",
+"input": "&#x00ec;",
+"output": [["Character", "\u00EC"]]},
+
+{"description": "Valid numeric entity character U+00ED",
+"input": "&#x00ed;",
+"output": [["Character", "\u00ED"]]},
+
+{"description": "Valid numeric entity character U+00EE",
+"input": "&#x00ee;",
+"output": [["Character", "\u00EE"]]},
+
+{"description": "Valid numeric entity character U+00EF",
+"input": "&#x00ef;",
+"output": [["Character", "\u00EF"]]},
+
+{"description": "Valid numeric entity character U+00F0",
+"input": "&#x00f0;",
+"output": [["Character", "\u00F0"]]},
+
+{"description": "Valid numeric entity character U+00F1",
+"input": "&#x00f1;",
+"output": [["Character", "\u00F1"]]},
+
+{"description": "Valid numeric entity character U+00F2",
+"input": "&#x00f2;",
+"output": [["Character", "\u00F2"]]},
+
+{"description": "Valid numeric entity character U+00F3",
+"input": "&#x00f3;",
+"output": [["Character", "\u00F3"]]},
+
+{"description": "Valid numeric entity character U+00F4",
+"input": "&#x00f4;",
+"output": [["Character", "\u00F4"]]},
+
+{"description": "Valid numeric entity character U+00F5",
+"input": "&#x00f5;",
+"output": [["Character", "\u00F5"]]},
+
+{"description": "Valid numeric entity character U+00F6",
+"input": "&#x00f6;",
+"output": [["Character", "\u00F6"]]},
+
+{"description": "Valid numeric entity character U+00F7",
+"input": "&#x00f7;",
+"output": [["Character", "\u00F7"]]},
+
+{"description": "Valid numeric entity character U+00F8",
+"input": "&#x00f8;",
+"output": [["Character", "\u00F8"]]},
+
+{"description": "Valid numeric entity character U+00F9",
+"input": "&#x00f9;",
+"output": [["Character", "\u00F9"]]},
+
+{"description": "Valid numeric entity character U+00FA",
+"input": "&#x00fa;",
+"output": [["Character", "\u00FA"]]},
+
+{"description": "Valid numeric entity character U+00FB",
+"input": "&#x00fb;",
+"output": [["Character", "\u00FB"]]},
+
+{"description": "Valid numeric entity character U+00FC",
+"input": "&#x00fc;",
+"output": [["Character", "\u00FC"]]},
+
+{"description": "Valid numeric entity character U+00FD",
+"input": "&#x00fd;",
+"output": [["Character", "\u00FD"]]},
+
+{"description": "Valid numeric entity character U+00FE",
+"input": "&#x00fe;",
+"output": [["Character", "\u00FE"]]},
+
+{"description": "Valid numeric entity character U+00FF",
+"input": "&#x00ff;",
+"output": [["Character", "\u00FF"]]},
+
+{"description": "Valid numeric entity character U+D7FF",
+"input": "&#xd7ff;",
+"output": [["Character", "\uD7FF"]]},
+
+{"description": "Valid numeric entity character U+E000",
+"input": "&#xe000;",
+"output": [["Character", "\uE000"]]},
+
+{"description": "Valid numeric entity character U+FDCF",
+"input": "&#xfdcf;",
+"output": [["Character", "\uFDCF"]]},
+
+{"description": "Valid numeric entity character U+FDF0",
+"input": "&#xfdf0;",
+"output": [["Character", "\uFDF0"]]},
+
+{"description": "Valid numeric entity character U+FFFD",
+"input": "&#xfffd;",
+"output": [["Character", "\uFFFD"]]},
+
+{"description": "Valid numeric entity character U+10000",
+"input": "&#x10000;",
+"output": [["Character", "\uD800\uDC00"]]},
+
+{"description": "Valid numeric entity character U+1FFFD",
+"input": "&#x1fffd;",
+"output": [["Character", "\uD83F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+20000",
+"input": "&#x20000;",
+"output": [["Character", "\uD840\uDC00"]]},
+
+{"description": "Valid numeric entity character U+2FFFD",
+"input": "&#x2fffd;",
+"output": [["Character", "\uD87F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+30000",
+"input": "&#x30000;",
+"output": [["Character", "\uD880\uDC00"]]},
+
+{"description": "Valid numeric entity character U+3FFFD",
+"input": "&#x3fffd;",
+"output": [["Character", "\uD8BF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+40000",
+"input": "&#x40000;",
+"output": [["Character", "\uD8C0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+4FFFD",
+"input": "&#x4fffd;",
+"output": [["Character", "\uD8FF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+50000",
+"input": "&#x50000;",
+"output": [["Character", "\uD900\uDC00"]]},
+
+{"description": "Valid numeric entity character U+5FFFD",
+"input": "&#x5fffd;",
+"output": [["Character", "\uD93F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+60000",
+"input": "&#x60000;",
+"output": [["Character", "\uD940\uDC00"]]},
+
+{"description": "Valid numeric entity character U+6FFFD",
+"input": "&#x6fffd;",
+"output": [["Character", "\uD97F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+70000",
+"input": "&#x70000;",
+"output": [["Character", "\uD980\uDC00"]]},
+
+{"description": "Valid numeric entity character U+7FFFD",
+"input": "&#x7fffd;",
+"output": [["Character", "\uD9BF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+80000",
+"input": "&#x80000;",
+"output": [["Character", "\uD9C0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+8FFFD",
+"input": "&#x8fffd;",
+"output": [["Character", "\uD9FF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+90000",
+"input": "&#x90000;",
+"output": [["Character", "\uDA00\uDC00"]]},
+
+{"description": "Valid numeric entity character U+9FFFD",
+"input": "&#x9fffd;",
+"output": [["Character", "\uDA3F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+A0000",
+"input": "&#xa0000;",
+"output": [["Character", "\uDA40\uDC00"]]},
+
+{"description": "Valid numeric entity character U+AFFFD",
+"input": "&#xafffd;",
+"output": [["Character", "\uDA7F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+B0000",
+"input": "&#xb0000;",
+"output": [["Character", "\uDA80\uDC00"]]},
+
+{"description": "Valid numeric entity character U+BFFFD",
+"input": "&#xbfffd;",
+"output": [["Character", "\uDABF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+C0000",
+"input": "&#xc0000;",
+"output": [["Character", "\uDAC0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+CFFFD",
+"input": "&#xcfffd;",
+"output": [["Character", "\uDAFF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+D0000",
+"input": "&#xd0000;",
+"output": [["Character", "\uDB00\uDC00"]]},
+
+{"description": "Valid numeric entity character U+DFFFD",
+"input": "&#xdfffd;",
+"output": [["Character", "\uDB3F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+E0000",
+"input": "&#xe0000;",
+"output": [["Character", "\uDB40\uDC00"]]},
+
+{"description": "Valid numeric entity character U+EFFFD",
+"input": "&#xefffd;",
+"output": [["Character", "\uDB7F\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+F0000",
+"input": "&#xf0000;",
+"output": [["Character", "\uDB80\uDC00"]]},
+
+{"description": "Valid numeric entity character U+FFFFD",
+"input": "&#xffffd;",
+"output": [["Character", "\uDBBF\uDFFD"]]},
+
+{"description": "Valid numeric entity character U+100000",
+"input": "&#x100000;",
+"output": [["Character", "\uDBC0\uDC00"]]},
+
+{"description": "Valid numeric entity character U+10FFFD",
+"input": "&#x10fffd;",
+"output": [["Character", "\uDBFF\uDFFD"]]}
+
+]}
+
+
diff --git a/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test b/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
new file mode 100644
index 00000000..1b7dc3c7
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
@@ -0,0 +1,7 @@
+{"tests": [
+
+{"description":"<!---- >",
+"input":"<!---- >",
+"output":["ParseError", "ParseError", ["Comment","-- >"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/test1.test b/html5lib/tests/testdata/tokenizer/test1.test
new file mode 100644
index 00000000..5de66f54
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/test1.test
@@ -0,0 +1,196 @@
+{"tests": [
+
+{"description":"Correct Doctype lowercase",
+"input":"<!DOCTYPE html>",
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Correct Doctype uppercase",
+"input":"<!DOCTYPE HTML>",
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Correct Doctype mixed case",
+"input":"<!DOCTYPE HtMl>", 
+"output":[["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Correct Doctype case with EOF",
+"input":"<!DOCTYPE HtMl", 
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Truncated doctype start",
+"input":"<!DOC>", 
+"output":["ParseError", ["Comment", "DOC"]]},
+
+{"description":"Doctype in error",
+"input":"<!DOCTYPE foo>", 
+"output":[["DOCTYPE", "foo", null, null, true]]},
+
+{"description":"Single Start Tag",
+"input":"<h>",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty end tag",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"Empty start tag",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"Start Tag w/attribute",
+"input":"<h a='b'>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start Tag w/attribute no quotes",
+"input":"<h a=b>",
+"output":[["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Start/End Tag",
+"input":"<h></h>",
+"output":[["StartTag", "h", {}], ["EndTag", "h"]]},
+
+{"description":"Two unclosed start tags",
+"input":"<p>One<p>Two",
+"output":[["StartTag", "p", {}], ["Character", "One"], ["StartTag", "p", {}], ["Character", "Two"]]},
+
+{"description":"End Tag w/attribute",
+"input":"<h></h a='b'>",
+"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]},
+
+{"description":"Multiple atts",
+"input":"<h a='b' c='d'>",
+"output":[["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Multiple atts no space",
+"input":"<h a='b'c='d'>",
+"output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]]},
+
+{"description":"Repeated attr",
+ "input":"<h a='b' a='d'>",
+ "output":["ParseError", ["StartTag", "h", {"a":"b"}]]},
+
+{"description":"Simple comment",
+ "input":"<!--comment-->",
+ "output":[["Comment", "comment"]]},
+
+{"description":"Comment, Central dash no space",
+ "input":"<!----->",
+ "output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Comment, two central dashes",
+"input":"<!-- --comment -->",
+"output":["ParseError", ["Comment", " --comment "]]},
+
+{"description":"Unfinished comment",
+"input":"<!--comment",
+"output":["ParseError", ["Comment", "comment"]]},
+
+{"description":"Start of a comment",
+"input":"<!-",
+"output":["ParseError", ["Comment", "-"]]},
+
+{"description":"Short comment",
+ "input":"<!-->",
+ "output":["ParseError", ["Comment", ""]]},
+
+{"description":"Short comment two",
+ "input":"<!--->",
+ "output":["ParseError", ["Comment", ""]]},
+
+{"description":"Short comment three",
+ "input":"<!---->",
+ "output":[["Comment", ""]]},
+
+
+{"description":"Ampersand EOF",
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"Ampersand ampersand EOF",
+"input":"&&",
+"output":[["Character", "&&"]]},
+
+{"description":"Ampersand space EOF",
+"input":"& ",
+"output":[["Character", "& "]]},
+
+{"description":"Unfinished entity",
+"input":"&f",
+"output":["ParseError", ["Character", "&f"]]},
+
+{"description":"Ampersand, number sign",
+"input":"&#",
+"output":["ParseError", ["Character", "&#"]]},
+
+{"description":"Unfinished numeric entity",
+"input":"&#x",
+"output":["ParseError", ["Character", "&#x"]]},
+
+{"description":"Entity with trailing semicolon (1)",
+"input":"I'm &not;it",
+"output":[["Character","I'm \u00ACit"]]},
+
+{"description":"Entity with trailing semicolon (2)",
+"input":"I'm &notin;",
+"output":[["Character","I'm \u2209"]]},
+
+{"description":"Entity without trailing semicolon (1)",
+"input":"I'm &notit",
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACit"]]},
+
+{"description":"Entity without trailing semicolon (2)",
+"input":"I'm &notin",
+"output":[["Character","I'm "], "ParseError", ["Character", "\u00ACin"]]},
+
+{"description":"Partial entity match at end of file",
+"input":"I'm &no",
+"output":[["Character","I'm "], "ParseError", ["Character", "&no"]]},
+
+{"description":"Non-ASCII character reference name",
+"input":"&\u00AC;",
+"output":["ParseError", ["Character", "&\u00AC;"]]},
+
+{"description":"ASCII decimal entity",
+"input":"&#0036;",
+"output":[["Character","$"]]},
+
+{"description":"ASCII hexadecimal entity",
+"input":"&#x3f;",
+"output":[["Character","?"]]},
+
+{"description":"Hexadecimal entity in attribute",
+"input":"<h a='&#x3f;'></h>",
+"output":[["StartTag", "h", {"a":"?"}], ["EndTag", "h"]]},
+
+{"description":"Entity in attribute without semicolon ending in x",
+"input":"<h a='&notx'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&notx"}]]},
+
+{"description":"Entity in attribute without semicolon ending in 1",
+"input":"<h a='&not1'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&not1"}]]},
+
+{"description":"Entity in attribute without semicolon ending in i",
+"input":"<h a='&noti'>",
+"output":["ParseError", ["StartTag", "h", {"a":"&noti"}]]},
+
+{"description":"Entity in attribute without semicolon",
+"input":"<h a='&COPY'>",
+"output":["ParseError", ["StartTag", "h", {"a":"\u00A9"}]]},
+
+{"description":"Unquoted attribute ending in ampersand",
+"input":"<s o=& t>",
+"output":[["StartTag","s",{"o":"&","t":""}]]},
+
+{"description":"Unquoted attribute at end of tag with final character of &, with tag followed by characters",
+"input":"<a a=a&>foo",
+"output":[["StartTag", "a", {"a":"a&"}], ["Character", "foo"]]},
+
+{"description":"plaintext element",
+ "input":"<plaintext>foobar",
+ "output":[["StartTag","plaintext",{}], ["Character","foobar"]]},
+
+{"description":"Open angled bracket in unquoted attribute value state",
+ "input":"<a a=f<>",
+ "output":["ParseError", ["StartTag", "a", {"a":"f<"}]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/test2.test b/html5lib/tests/testdata/tokenizer/test2.test
new file mode 100644
index 00000000..e1575143
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/test2.test
@@ -0,0 +1,179 @@
+{"tests": [
+
+{"description":"DOCTYPE without name",
+"input":"<!DOCTYPE>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"DOCTYPE without space before name",
+"input":"<!DOCTYPEhtml>",
+"output":["ParseError", ["DOCTYPE", "html", null, null, true]]},
+
+{"description":"Incorrect DOCTYPE without a space before name",
+"input":"<!DOCTYPEfoo>",
+"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]},
+
+{"description":"DOCTYPE with publicId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", null, true]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC",
+"input":"<!DOCTYPE html PUBLIC",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC '",
+"input":"<!DOCTYPE html PUBLIC '",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false]]},
+
+{"description":"DOCTYPE with EOF after PUBLIC 'x",
+"input":"<!DOCTYPE html PUBLIC 'x",
+"output":["ParseError", ["DOCTYPE", "html", "x", null, false]]},
+
+{"description":"DOCTYPE with systemId",
+"input":"<!DOCTYPE html SYSTEM \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", null, "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with publicId and systemId",
+"input":"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML Transitional 4.01//EN\" \"-//W3C//DTD HTML Transitional 4.01//EN\">",
+"output":[["DOCTYPE", "html", "-//W3C//DTD HTML Transitional 4.01//EN", "-//W3C//DTD HTML Transitional 4.01//EN", true]]},
+
+{"description":"DOCTYPE with > in double-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC \">x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted publicId",
+"input":"<!DOCTYPE html PUBLIC '>x",
+"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in double-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC \"foo\" \">x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
+
+{"description":"DOCTYPE with > in single-quoted systemId",
+"input":"<!DOCTYPE html PUBLIC 'foo' '>x",
+"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]},
+
+{"description":"Incomplete doctype",
+"input":"<!DOCTYPE html ",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Numeric entity representing the NUL character",
+"input":"&#0000;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity representing the NUL character",
+"input":"&#x0000;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#2225222;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)",
+"input":"&#x1010FFFF;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity pair representing a surrogate pair",
+"input":"&#xD869;&#xDED6;",
+"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Hexadecimal entity with mixed uppercase and lowercase",
+"input":"&#xaBcD;",
+"output":[["Character", "\uABCD"]]},
+
+{"description":"Entity without a name",
+"input":"&;",
+"output":["ParseError", ["Character", "&;"]]},
+
+{"description":"Unescaped ampersand in attribute value",
+"input":"<h a='&'>",
+"output":[["StartTag", "h", { "a":"&" }]]},
+
+{"description":"StartTag containing <",
+"input":"<a<b>",
+"output":[["StartTag", "a<b", { }]]},
+
+{"description":"Non-void element containing trailing /",
+"input":"<h/>",
+"output":[["StartTag","h",{},true]]},
+
+{"description":"Void element with permitted slash",
+"input":"<br/>",
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Void element with permitted slash (with attribute)",
+"input":"<br foo='bar'/>",
+"output":[["StartTag","br",{"foo":"bar"},true]]},
+
+{"description":"StartTag containing /",
+"input":"<h/a='b'>",
+"output":["ParseError", ["StartTag", "h", { "a":"b" }]]},
+
+{"description":"Double-quoted attribute value",
+"input":"<h a=\"b\">",
+"output":[["StartTag", "h", { "a":"b" }]]},
+
+{"description":"Unescaped </",
+"input":"</",
+"output":["ParseError", ["Character", "</"]]},
+
+{"description":"Illegal end tag name",
+"input":"</1>",
+"output":["ParseError", ["Comment", "1"]]},
+
+{"description":"Simili processing instruction",
+"input":"<?namespace>",
+"output":["ParseError", ["Comment", "?namespace"]]},
+
+{"description":"A bogus comment stops at >, even if preceeded by two dashes",
+"input":"<?foo-->",
+"output":["ParseError", ["Comment", "?foo--"]]},
+
+{"description":"Unescaped <",
+"input":"foo < bar",
+"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]},
+
+{"description":"Null Byte Replacement",
+"input":"\u0000",
+"output":["ParseError", ["Character", "\u0000"]]},
+
+{"description":"Comment with dash",
+"input":"<!---x",
+"output":["ParseError", ["Comment", "-x"]]},
+
+{"description":"Entity + newline",
+"input":"\nx\n&gt;\n",
+"output":[["Character","\nx\n>\n"]]},
+
+{"description":"Start tag with no attributes but space before the greater-than sign",
+"input":"<h >",
+"output":[["StartTag", "h", {}]]},
+
+{"description":"Empty attribute followed by uppercase attribute",
+"input":"<h a B=''>",
+"output":[["StartTag", "h", {"a":"", "b":""}]]},
+
+{"description":"Double-quote after attribute name",
+"input":"<h a \">",
+"output":["ParseError", ["StartTag", "h", {"a":"", "\"":""}]]},
+
+{"description":"Single-quote after attribute name",
+"input":"<h a '>",
+"output":["ParseError", ["StartTag", "h", {"a":"", "'":""}]]},
+
+{"description":"Empty end tag with following characters",
+"input":"a</>bc",
+"output":[["Character", "a"], "ParseError", ["Character", "bc"]]},
+
+{"description":"Empty end tag with following tag",
+"input":"a</><b>c",
+"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]},
+
+{"description":"Empty end tag with following comment",
+"input":"a</><!--b-->c",
+"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]},
+
+{"description":"Empty end tag with following end tag",
+"input":"a</></b>c",
+"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/test3.test b/html5lib/tests/testdata/tokenizer/test3.test
new file mode 100644
index 00000000..58519e8a
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/test3.test
@@ -0,0 +1,6047 @@
+{"tests": [
+
+{"description":"",
+"input":"",
+"output":[]},
+
+{"description":"\\u0009",
+"input":"\u0009",
+"output":[["Character", "\u0009"]]},
+
+{"description":"\\u000A",
+"input":"\u000A",
+"output":[["Character", "\u000A"]]},
+
+{"description":"\\u000B",
+"input":"\u000B",
+"output":["ParseError", ["Character", "\u000B"]]},
+
+{"description":"\\u000C",
+"input":"\u000C",
+"output":[["Character", "\u000C"]]},
+
+{"description":" ",
+"input":" ",
+"output":[["Character", " "]]},
+
+{"description":"!",
+"input":"!",
+"output":[["Character", "!"]]},
+
+{"description":"\"",
+"input":"\"",
+"output":[["Character", "\""]]},
+
+{"description":"%",
+"input":"%",
+"output":[["Character", "%"]]},
+
+{"description":"&",
+"input":"&",
+"output":[["Character", "&"]]},
+
+{"description":"'",
+"input":"'",
+"output":[["Character", "'"]]},
+
+{"description":",",
+"input":",",
+"output":[["Character", ","]]},
+
+{"description":"-",
+"input":"-",
+"output":[["Character", "-"]]},
+
+{"description":".",
+"input":".",
+"output":[["Character", "."]]},
+
+{"description":"/",
+"input":"/",
+"output":[["Character", "/"]]},
+
+{"description":"0",
+"input":"0",
+"output":[["Character", "0"]]},
+
+{"description":"1",
+"input":"1",
+"output":[["Character", "1"]]},
+
+{"description":"9",
+"input":"9",
+"output":[["Character", "9"]]},
+
+{"description":";",
+"input":";",
+"output":[["Character", ";"]]},
+
+{"description":"<",
+"input":"<",
+"output":["ParseError", ["Character", "<"]]},
+
+{"description":"<\\u0000",
+"input":"<\u0000",
+"output":["ParseError", ["Character", "<"], "ParseError", ["Character", "\u0000"]]},
+
+{"description":"<\\u0009",
+"input":"<\u0009",
+"output":["ParseError", ["Character", "<\u0009"]]},
+
+{"description":"<\\u000A",
+"input":"<\u000A",
+"output":["ParseError", ["Character", "<\u000A"]]},
+
+{"description":"<\\u000B",
+"input":"<\u000B",
+"output":["ParseError", "ParseError", ["Character", "<\u000B"]]},
+
+{"description":"<\\u000C",
+"input":"<\u000C",
+"output":["ParseError", ["Character", "<\u000C"]]},
+
+{"description":"< ",
+"input":"< ",
+"output":["ParseError", ["Character", "< "]]},
+
+{"description":"<!",
+"input":"<!",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!\\u0000",
+"input":"<!\u0000",
+"output":["ParseError", ["Comment", "\uFFFD"]]},
+
+{"description":"<!\\u0009",
+"input":"<!\u0009",
+"output":["ParseError", ["Comment", "\u0009"]]},
+
+{"description":"<!\\u000A",
+"input":"<!\u000A",
+"output":["ParseError", ["Comment", "\u000A"]]},
+
+{"description":"<!\\u000B",
+"input":"<!\u000B",
+"output":["ParseError", "ParseError", ["Comment", "\u000B"]]},
+
+{"description":"<!\\u000C",
+"input":"<!\u000C",
+"output":["ParseError", ["Comment", "\u000C"]]},
+
+{"description":"<! ",
+"input":"<! ",
+"output":["ParseError", ["Comment", " "]]},
+
+{"description":"<!!",
+"input":"<!!",
+"output":["ParseError", ["Comment", "!"]]},
+
+{"description":"<!\"",
+"input":"<!\"",
+"output":["ParseError", ["Comment", "\""]]},
+
+{"description":"<!&",
+"input":"<!&",
+"output":["ParseError", ["Comment", "&"]]},
+
+{"description":"<!'",
+"input":"<!'",
+"output":["ParseError", ["Comment", "'"]]},
+
+{"description":"<!-",
+"input":"<!-",
+"output":["ParseError", ["Comment", "-"]]},
+
+{"description":"<!--",
+"input":"<!--",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--\\u0000",
+"input":"<!--\u0000",
+"output":["ParseError", "ParseError", ["Comment", "\uFFFD"]]},
+
+{"description":"<!--\\u0009",
+"input":"<!--\u0009",
+"output":["ParseError", ["Comment", "\u0009"]]},
+
+{"description":"<!--\\u000A",
+"input":"<!--\u000A",
+"output":["ParseError", ["Comment", "\u000A"]]},
+
+{"description":"<!--\\u000B",
+"input":"<!--\u000B",
+"output":["ParseError", "ParseError", ["Comment", "\u000B"]]},
+
+{"description":"<!--\\u000C",
+"input":"<!--\u000C",
+"output":["ParseError", ["Comment", "\u000C"]]},
+
+{"description":"<!-- ",
+"input":"<!-- ",
+"output":["ParseError", ["Comment", " "]]},
+
+{"description":"<!-- \\u0000",
+"input":"<!-- \u0000",
+"output":["ParseError", "ParseError", ["Comment", " \uFFFD"]]},
+
+{"description":"<!-- \\u0009",
+"input":"<!-- \u0009",
+"output":["ParseError", ["Comment", " \u0009"]]},
+
+{"description":"<!-- \\u000A",
+"input":"<!-- \u000A",
+"output":["ParseError", ["Comment", " \u000A"]]},
+
+{"description":"<!-- \\u000B",
+"input":"<!-- \u000B",
+"output":["ParseError", "ParseError", ["Comment", " \u000B"]]},
+
+{"description":"<!-- \\u000C",
+"input":"<!-- \u000C",
+"output":["ParseError", ["Comment", " \u000C"]]},
+
+{"description":"<!--  ",
+"input":"<!--  ",
+"output":["ParseError", ["Comment", "  "]]},
+
+{"description":"<!-- !",
+"input":"<!-- !",
+"output":["ParseError", ["Comment", " !"]]},
+
+{"description":"<!-- \"",
+"input":"<!-- \"",
+"output":["ParseError", ["Comment", " \""]]},
+
+{"description":"<!-- &",
+"input":"<!-- &",
+"output":["ParseError", ["Comment", " &"]]},
+
+{"description":"<!-- '",
+"input":"<!-- '",
+"output":["ParseError", ["Comment", " '"]]},
+
+{"description":"<!-- ,",
+"input":"<!-- ,",
+"output":["ParseError", ["Comment", " ,"]]},
+
+{"description":"<!-- -",
+"input":"<!-- -",
+"output":["ParseError", ["Comment", " "]]},
+
+{"description":"<!-- -\\u0000",
+"input":"<!-- -\u0000",
+"output":["ParseError", "ParseError", ["Comment", " -\uFFFD"]]},
+
+{"description":"<!-- -\\u0009",
+"input":"<!-- -\u0009",
+"output":["ParseError", ["Comment", " -\u0009"]]},
+
+{"description":"<!-- -\\u000A",
+"input":"<!-- -\u000A",
+"output":["ParseError", ["Comment", " -\u000A"]]},
+
+{"description":"<!-- -\\u000B",
+"input":"<!-- -\u000B",
+"output":["ParseError", "ParseError", ["Comment", " -\u000B"]]},
+
+{"description":"<!-- -\\u000C",
+"input":"<!-- -\u000C",
+"output":["ParseError", ["Comment", " -\u000C"]]},
+
+{"description":"<!-- - ",
+"input":"<!-- - ",
+"output":["ParseError", ["Comment", " - "]]},
+
+{"description":"<!-- -!",
+"input":"<!-- -!",
+"output":["ParseError", ["Comment", " -!"]]},
+
+{"description":"<!-- -\"",
+"input":"<!-- -\"",
+"output":["ParseError", ["Comment", " -\""]]},
+
+{"description":"<!-- -&",
+"input":"<!-- -&",
+"output":["ParseError", ["Comment", " -&"]]},
+
+{"description":"<!-- -'",
+"input":"<!-- -'",
+"output":["ParseError", ["Comment", " -'"]]},
+
+{"description":"<!-- -,",
+"input":"<!-- -,",
+"output":["ParseError", ["Comment", " -,"]]},
+
+{"description":"<!-- --",
+"input":"<!-- --",
+"output":["ParseError", ["Comment", " "]]},
+
+{"description":"<!-- -.",
+"input":"<!-- -.",
+"output":["ParseError", ["Comment", " -."]]},
+
+{"description":"<!-- -/",
+"input":"<!-- -/",
+"output":["ParseError", ["Comment", " -/"]]},
+
+{"description":"<!-- -0",
+"input":"<!-- -0",
+"output":["ParseError", ["Comment", " -0"]]},
+
+{"description":"<!-- -1",
+"input":"<!-- -1",
+"output":["ParseError", ["Comment", " -1"]]},
+
+{"description":"<!-- -9",
+"input":"<!-- -9",
+"output":["ParseError", ["Comment", " -9"]]},
+
+{"description":"<!-- -<",
+"input":"<!-- -<",
+"output":["ParseError", ["Comment", " -<"]]},
+
+{"description":"<!-- -=",
+"input":"<!-- -=",
+"output":["ParseError", ["Comment", " -="]]},
+
+{"description":"<!-- ->",
+"input":"<!-- ->",
+"output":["ParseError", ["Comment", " ->"]]},
+
+{"description":"<!-- -?",
+"input":"<!-- -?",
+"output":["ParseError", ["Comment", " -?"]]},
+
+{"description":"<!-- -@",
+"input":"<!-- -@",
+"output":["ParseError", ["Comment", " -@"]]},
+
+{"description":"<!-- -A",
+"input":"<!-- -A",
+"output":["ParseError", ["Comment", " -A"]]},
+
+{"description":"<!-- -B",
+"input":"<!-- -B",
+"output":["ParseError", ["Comment", " -B"]]},
+
+{"description":"<!-- -Y",
+"input":"<!-- -Y",
+"output":["ParseError", ["Comment", " -Y"]]},
+
+{"description":"<!-- -Z",
+"input":"<!-- -Z",
+"output":["ParseError", ["Comment", " -Z"]]},
+
+{"description":"<!-- -`",
+"input":"<!-- -`",
+"output":["ParseError", ["Comment", " -`"]]},
+
+{"description":"<!-- -a",
+"input":"<!-- -a",
+"output":["ParseError", ["Comment", " -a"]]},
+
+{"description":"<!-- -b",
+"input":"<!-- -b",
+"output":["ParseError", ["Comment", " -b"]]},
+
+{"description":"<!-- -y",
+"input":"<!-- -y",
+"output":["ParseError", ["Comment", " -y"]]},
+
+{"description":"<!-- -z",
+"input":"<!-- -z",
+"output":["ParseError", ["Comment", " -z"]]},
+
+{"description":"<!-- -{",
+"input":"<!-- -{",
+"output":["ParseError", ["Comment", " -{"]]},
+
+{"description":"<!-- -\\uDBC0\\uDC00",
+"input":"<!-- -\uDBC0\uDC00",
+"output":["ParseError", ["Comment", " -\uDBC0\uDC00"]]},
+
+{"description":"<!-- .",
+"input":"<!-- .",
+"output":["ParseError", ["Comment", " ."]]},
+
+{"description":"<!-- /",
+"input":"<!-- /",
+"output":["ParseError", ["Comment", " /"]]},
+
+{"description":"<!-- 0",
+"input":"<!-- 0",
+"output":["ParseError", ["Comment", " 0"]]},
+
+{"description":"<!-- 1",
+"input":"<!-- 1",
+"output":["ParseError", ["Comment", " 1"]]},
+
+{"description":"<!-- 9",
+"input":"<!-- 9",
+"output":["ParseError", ["Comment", " 9"]]},
+
+{"description":"<!-- <",
+"input":"<!-- <",
+"output":["ParseError", ["Comment", " <"]]},
+
+{"description":"<!-- =",
+"input":"<!-- =",
+"output":["ParseError", ["Comment", " ="]]},
+
+{"description":"<!-- >",
+"input":"<!-- >",
+"output":["ParseError", ["Comment", " >"]]},
+
+{"description":"<!-- ?",
+"input":"<!-- ?",
+"output":["ParseError", ["Comment", " ?"]]},
+
+{"description":"<!-- @",
+"input":"<!-- @",
+"output":["ParseError", ["Comment", " @"]]},
+
+{"description":"<!-- A",
+"input":"<!-- A",
+"output":["ParseError", ["Comment", " A"]]},
+
+{"description":"<!-- B",
+"input":"<!-- B",
+"output":["ParseError", ["Comment", " B"]]},
+
+{"description":"<!-- Y",
+"input":"<!-- Y",
+"output":["ParseError", ["Comment", " Y"]]},
+
+{"description":"<!-- Z",
+"input":"<!-- Z",
+"output":["ParseError", ["Comment", " Z"]]},
+
+{"description":"<!-- `",
+"input":"<!-- `",
+"output":["ParseError", ["Comment", " `"]]},
+
+{"description":"<!-- a",
+"input":"<!-- a",
+"output":["ParseError", ["Comment", " a"]]},
+
+{"description":"<!-- b",
+"input":"<!-- b",
+"output":["ParseError", ["Comment", " b"]]},
+
+{"description":"<!-- y",
+"input":"<!-- y",
+"output":["ParseError", ["Comment", " y"]]},
+
+{"description":"<!-- z",
+"input":"<!-- z",
+"output":["ParseError", ["Comment", " z"]]},
+
+{"description":"<!-- {",
+"input":"<!-- {",
+"output":["ParseError", ["Comment", " {"]]},
+
+{"description":"<!-- \\uDBC0\\uDC00",
+"input":"<!-- \uDBC0\uDC00",
+"output":["ParseError", ["Comment", " \uDBC0\uDC00"]]},
+
+{"description":"<!--!",
+"input":"<!--!",
+"output":["ParseError", ["Comment", "!"]]},
+
+{"description":"<!--\"",
+"input":"<!--\"",
+"output":["ParseError", ["Comment", "\""]]},
+
+{"description":"<!--&",
+"input":"<!--&",
+"output":["ParseError", ["Comment", "&"]]},
+
+{"description":"<!--'",
+"input":"<!--'",
+"output":["ParseError", ["Comment", "'"]]},
+
+{"description":"<!--,",
+"input":"<!--,",
+"output":["ParseError", ["Comment", ","]]},
+
+{"description":"<!---",
+"input":"<!---",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---\\u0000",
+"input":"<!---\u0000",
+"output":["ParseError", "ParseError", ["Comment", "-\uFFFD"]]},
+
+{"description":"<!---\\u0009",
+"input":"<!---\u0009",
+"output":["ParseError", ["Comment", "-\u0009"]]},
+
+{"description":"<!---\\u000A",
+"input":"<!---\u000A",
+"output":["ParseError", ["Comment", "-\u000A"]]},
+
+{"description":"<!---\\u000B",
+"input":"<!---\u000B",
+"output":["ParseError", "ParseError", ["Comment", "-\u000B"]]},
+
+{"description":"<!---\\u000C",
+"input":"<!---\u000C",
+"output":["ParseError", ["Comment", "-\u000C"]]},
+
+{"description":"<!--- ",
+"input":"<!--- ",
+"output":["ParseError", ["Comment", "- "]]},
+
+{"description":"<!---!",
+"input":"<!---!",
+"output":["ParseError", ["Comment", "-!"]]},
+
+{"description":"<!---\"",
+"input":"<!---\"",
+"output":["ParseError", ["Comment", "-\""]]},
+
+{"description":"<!---&",
+"input":"<!---&",
+"output":["ParseError", ["Comment", "-&"]]},
+
+{"description":"<!---'",
+"input":"<!---'",
+"output":["ParseError", ["Comment", "-'"]]},
+
+{"description":"<!---,",
+"input":"<!---,",
+"output":["ParseError", ["Comment", "-,"]]},
+
+{"description":"<!----",
+"input":"<!----",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!----\\u0000",
+"input":"<!----\u0000",
+"output":["ParseError", "ParseError", ["Comment", "--\uFFFD"]]},
+
+{"description":"<!----\\u0009",
+"input":"<!----\u0009",
+"output":["ParseError", "ParseError", ["Comment", "--\u0009"]]},
+
+{"description":"<!----\\u000A",
+"input":"<!----\u000A",
+"output":["ParseError", "ParseError", ["Comment", "--\u000A"]]},
+
+{"description":"<!----\\u000B",
+"input":"<!----\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["Comment", "--\u000B"]]},
+
+{"description":"<!----\\u000C",
+"input":"<!----\u000C",
+"output":["ParseError", "ParseError", ["Comment", "--\u000C"]]},
+
+{"description":"<!---- ",
+"input":"<!---- ",
+"output":["ParseError", "ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- -",
+"input":"<!---- -",
+"output":["ParseError", "ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- --",
+"input":"<!---- --",
+"output":["ParseError", "ParseError", ["Comment", "-- "]]},
+
+{"description":"<!---- -->",
+"input":"<!---- -->",
+"output":["ParseError", ["Comment", "-- "]]},
+
+{"description":"<!----  -->",
+"input":"<!----  -->",
+"output":["ParseError", ["Comment", "--  "]]},
+
+{"description":"<!---- a-->",
+"input":"<!---- a-->",
+"output":["ParseError", ["Comment", "-- a"]]},
+
+{"description":"<!----!",
+"input":"<!----!",
+"output":["ParseError", "ParseError", ["Comment", ""]]},
+
+{"description":"<!----!>",
+"input":"<!----!>",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!----!a",
+"input":"<!----!a",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a-",
+"input":"<!----!a-",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a--",
+"input":"<!----!a--",
+"output":["ParseError", "ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!a-->",
+"input":"<!----!a-->",
+"output":["ParseError", ["Comment", "--!a"]]},
+
+{"description":"<!----!-",
+"input":"<!----!-",
+"output":["ParseError", "ParseError", ["Comment", "--!"]]},
+
+{"description":"<!----!--",
+"input":"<!----!--",
+"output":["ParseError", "ParseError", ["Comment", "--!"]]},
+
+{"description":"<!----!-->",
+"input":"<!----!-->",
+"output":["ParseError", ["Comment", "--!"]]},
+
+{"description":"<!----\"",
+"input":"<!----\"",
+"output":["ParseError", "ParseError", ["Comment", "--\""]]},
+
+{"description":"<!----&",
+"input":"<!----&",
+"output":["ParseError", "ParseError", ["Comment", "--&"]]},
+
+{"description":"<!----'",
+"input":"<!----'",
+"output":["ParseError", "ParseError", ["Comment", "--'"]]},
+
+{"description":"<!----,",
+"input":"<!----,",
+"output":["ParseError", "ParseError", ["Comment", "--,"]]},
+
+{"description":"<!-----",
+"input":"<!-----",
+"output":["ParseError", "ParseError", ["Comment", "-"]]},
+
+{"description":"<!----.",
+"input":"<!----.",
+"output":["ParseError", "ParseError", ["Comment", "--."]]},
+
+{"description":"<!----/",
+"input":"<!----/",
+"output":["ParseError", "ParseError", ["Comment", "--/"]]},
+
+{"description":"<!----0",
+"input":"<!----0",
+"output":["ParseError", "ParseError", ["Comment", "--0"]]},
+
+{"description":"<!----1",
+"input":"<!----1",
+"output":["ParseError", "ParseError", ["Comment", "--1"]]},
+
+{"description":"<!----9",
+"input":"<!----9",
+"output":["ParseError", "ParseError", ["Comment", "--9"]]},
+
+{"description":"<!----<",
+"input":"<!----<",
+"output":["ParseError", "ParseError", ["Comment", "--<"]]},
+
+{"description":"<!----=",
+"input":"<!----=",
+"output":["ParseError", "ParseError", ["Comment", "--="]]},
+
+{"description":"<!---->",
+"input":"<!---->",
+"output":[["Comment", ""]]},
+
+{"description":"<!----?",
+"input":"<!----?",
+"output":["ParseError", "ParseError", ["Comment", "--?"]]},
+
+{"description":"<!----@",
+"input":"<!----@",
+"output":["ParseError", "ParseError", ["Comment", "--@"]]},
+
+{"description":"<!----A",
+"input":"<!----A",
+"output":["ParseError", "ParseError", ["Comment", "--A"]]},
+
+{"description":"<!----B",
+"input":"<!----B",
+"output":["ParseError", "ParseError", ["Comment", "--B"]]},
+
+{"description":"<!----Y",
+"input":"<!----Y",
+"output":["ParseError", "ParseError", ["Comment", "--Y"]]},
+
+{"description":"<!----Z",
+"input":"<!----Z",
+"output":["ParseError", "ParseError", ["Comment", "--Z"]]},
+
+{"description":"<!----`",
+"input":"<!----`",
+"output":["ParseError", "ParseError", ["Comment", "--`"]]},
+
+{"description":"<!----a",
+"input":"<!----a",
+"output":["ParseError", "ParseError", ["Comment", "--a"]]},
+
+{"description":"<!----b",
+"input":"<!----b",
+"output":["ParseError", "ParseError", ["Comment", "--b"]]},
+
+{"description":"<!----y",
+"input":"<!----y",
+"output":["ParseError", "ParseError", ["Comment", "--y"]]},
+
+{"description":"<!----z",
+"input":"<!----z",
+"output":["ParseError", "ParseError", ["Comment", "--z"]]},
+
+{"description":"<!----{",
+"input":"<!----{",
+"output":["ParseError", "ParseError", ["Comment", "--{"]]},
+
+{"description":"<!----\\uDBC0\\uDC00",
+"input":"<!----\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["Comment", "--\uDBC0\uDC00"]]},
+
+{"description":"<!---.",
+"input":"<!---.",
+"output":["ParseError", ["Comment", "-."]]},
+
+{"description":"<!---/",
+"input":"<!---/",
+"output":["ParseError", ["Comment", "-/"]]},
+
+{"description":"<!---0",
+"input":"<!---0",
+"output":["ParseError", ["Comment", "-0"]]},
+
+{"description":"<!---1",
+"input":"<!---1",
+"output":["ParseError", ["Comment", "-1"]]},
+
+{"description":"<!---9",
+"input":"<!---9",
+"output":["ParseError", ["Comment", "-9"]]},
+
+{"description":"<!---<",
+"input":"<!---<",
+"output":["ParseError", ["Comment", "-<"]]},
+
+{"description":"<!---=",
+"input":"<!---=",
+"output":["ParseError", ["Comment", "-="]]},
+
+{"description":"<!--->",
+"input":"<!--->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!---?",
+"input":"<!---?",
+"output":["ParseError", ["Comment", "-?"]]},
+
+{"description":"<!---@",
+"input":"<!---@",
+"output":["ParseError", ["Comment", "-@"]]},
+
+{"description":"<!---A",
+"input":"<!---A",
+"output":["ParseError", ["Comment", "-A"]]},
+
+{"description":"<!---B",
+"input":"<!---B",
+"output":["ParseError", ["Comment", "-B"]]},
+
+{"description":"<!---Y",
+"input":"<!---Y",
+"output":["ParseError", ["Comment", "-Y"]]},
+
+{"description":"<!---Z",
+"input":"<!---Z",
+"output":["ParseError", ["Comment", "-Z"]]},
+
+{"description":"<!---`",
+"input":"<!---`",
+"output":["ParseError", ["Comment", "-`"]]},
+
+{"description":"<!---a",
+"input":"<!---a",
+"output":["ParseError", ["Comment", "-a"]]},
+
+{"description":"<!---b",
+"input":"<!---b",
+"output":["ParseError", ["Comment", "-b"]]},
+
+{"description":"<!---y",
+"input":"<!---y",
+"output":["ParseError", ["Comment", "-y"]]},
+
+{"description":"<!---z",
+"input":"<!---z",
+"output":["ParseError", ["Comment", "-z"]]},
+
+{"description":"<!---{",
+"input":"<!---{",
+"output":["ParseError", ["Comment", "-{"]]},
+
+{"description":"<!---\\uDBC0\\uDC00",
+"input":"<!---\uDBC0\uDC00",
+"output":["ParseError", ["Comment", "-\uDBC0\uDC00"]]},
+
+{"description":"<!--.",
+"input":"<!--.",
+"output":["ParseError", ["Comment", "."]]},
+
+{"description":"<!--/",
+"input":"<!--/",
+"output":["ParseError", ["Comment", "/"]]},
+
+{"description":"<!--0",
+"input":"<!--0",
+"output":["ParseError", ["Comment", "0"]]},
+
+{"description":"<!--1",
+"input":"<!--1",
+"output":["ParseError", ["Comment", "1"]]},
+
+{"description":"<!--9",
+"input":"<!--9",
+"output":["ParseError", ["Comment", "9"]]},
+
+{"description":"<!--<",
+"input":"<!--<",
+"output":["ParseError", ["Comment", "<"]]},
+
+{"description":"<!--=",
+"input":"<!--=",
+"output":["ParseError", ["Comment", "="]]},
+
+{"description":"<!-->",
+"input":"<!-->",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!--?",
+"input":"<!--?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!--@",
+"input":"<!--@",
+"output":["ParseError", ["Comment", "@"]]},
+
+{"description":"<!--A",
+"input":"<!--A",
+"output":["ParseError", ["Comment", "A"]]},
+
+{"description":"<!--B",
+"input":"<!--B",
+"output":["ParseError", ["Comment", "B"]]},
+
+{"description":"<!--Y",
+"input":"<!--Y",
+"output":["ParseError", ["Comment", "Y"]]},
+
+{"description":"<!--Z",
+"input":"<!--Z",
+"output":["ParseError", ["Comment", "Z"]]},
+
+{"description":"<!--`",
+"input":"<!--`",
+"output":["ParseError", ["Comment", "`"]]},
+
+{"description":"<!--a",
+"input":"<!--a",
+"output":["ParseError", ["Comment", "a"]]},
+
+{"description":"<!--b",
+"input":"<!--b",
+"output":["ParseError", ["Comment", "b"]]},
+
+{"description":"<!--y",
+"input":"<!--y",
+"output":["ParseError", ["Comment", "y"]]},
+
+{"description":"<!--z",
+"input":"<!--z",
+"output":["ParseError", ["Comment", "z"]]},
+
+{"description":"<!--{",
+"input":"<!--{",
+"output":["ParseError", ["Comment", "{"]]},
+
+{"description":"<!--\\uDBC0\\uDC00",
+"input":"<!--\uDBC0\uDC00",
+"output":["ParseError", ["Comment", "\uDBC0\uDC00"]]},
+
+{"description":"<!/",
+"input":"<!/",
+"output":["ParseError", ["Comment", "/"]]},
+
+{"description":"<!0",
+"input":"<!0",
+"output":["ParseError", ["Comment", "0"]]},
+
+{"description":"<!1",
+"input":"<!1",
+"output":["ParseError", ["Comment", "1"]]},
+
+{"description":"<!9",
+"input":"<!9",
+"output":["ParseError", ["Comment", "9"]]},
+
+{"description":"<!<",
+"input":"<!<",
+"output":["ParseError", ["Comment", "<"]]},
+
+{"description":"<!=",
+"input":"<!=",
+"output":["ParseError", ["Comment", "="]]},
+
+{"description":"<!>",
+"input":"<!>",
+"output":["ParseError", ["Comment", ""]]},
+
+{"description":"<!?",
+"input":"<!?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<!@",
+"input":"<!@",
+"output":["ParseError", ["Comment", "@"]]},
+
+{"description":"<!A",
+"input":"<!A",
+"output":["ParseError", ["Comment", "A"]]},
+
+{"description":"<!B",
+"input":"<!B",
+"output":["ParseError", ["Comment", "B"]]},
+
+{"description":"<!DOCTYPE",
+"input":"<!DOCTYPE",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u0000",
+"input":"<!DOCTYPE\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "\uFFFD", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u0008",
+"input":"<!DOCTYPE\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "\u0008", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u0009",
+"input":"<!DOCTYPE\u0009",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u000A",
+"input":"<!DOCTYPE\u000A",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u000B",
+"input":"<!DOCTYPE\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "\u000B", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u000C",
+"input":"<!DOCTYPE\u000C",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u000D",
+"input":"<!DOCTYPE\u000D",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE\\u001F",
+"input":"<!DOCTYPE\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "\u001F", null, null, false]]},
+
+{"description":"<!DOCTYPE ",
+"input":"<!DOCTYPE ",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u0000",
+"input":"<!DOCTYPE \u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\uFFFD", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u0008",
+"input":"<!DOCTYPE \u0008",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\u0008", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u0009",
+"input":"<!DOCTYPE \u0009",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u000A",
+"input":"<!DOCTYPE \u000A",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u000B",
+"input":"<!DOCTYPE \u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\u000B", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u000C",
+"input":"<!DOCTYPE \u000C",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u000D",
+"input":"<!DOCTYPE \u000D",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE \\u001F",
+"input":"<!DOCTYPE \u001F",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\u001F", null, null, false]]},
+
+{"description":"<!DOCTYPE  ",
+"input":"<!DOCTYPE  ",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE !",
+"input":"<!DOCTYPE !",
+"output":["ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!DOCTYPE \"",
+"input":"<!DOCTYPE \"",
+"output":["ParseError", ["DOCTYPE", "\"", null, null, false]]},
+
+{"description":"<!DOCTYPE &",
+"input":"<!DOCTYPE &",
+"output":["ParseError", ["DOCTYPE", "&", null, null, false]]},
+
+{"description":"<!DOCTYPE '",
+"input":"<!DOCTYPE '",
+"output":["ParseError", ["DOCTYPE", "'", null, null, false]]},
+
+{"description":"<!DOCTYPE -",
+"input":"<!DOCTYPE -",
+"output":["ParseError", ["DOCTYPE", "-", null, null, false]]},
+
+{"description":"<!DOCTYPE /",
+"input":"<!DOCTYPE /",
+"output":["ParseError", ["DOCTYPE", "/", null, null, false]]},
+
+{"description":"<!DOCTYPE 0",
+"input":"<!DOCTYPE 0",
+"output":["ParseError", ["DOCTYPE", "0", null, null, false]]},
+
+{"description":"<!DOCTYPE 1",
+"input":"<!DOCTYPE 1",
+"output":["ParseError", ["DOCTYPE", "1", null, null, false]]},
+
+{"description":"<!DOCTYPE 9",
+"input":"<!DOCTYPE 9",
+"output":["ParseError", ["DOCTYPE", "9", null, null, false]]},
+
+{"description":"<!DOCTYPE <",
+"input":"<!DOCTYPE <",
+"output":["ParseError", ["DOCTYPE", "<", null, null, false]]},
+
+{"description":"<!DOCTYPE =",
+"input":"<!DOCTYPE =",
+"output":["ParseError", ["DOCTYPE", "=", null, null, false]]},
+
+{"description":"<!DOCTYPE >",
+"input":"<!DOCTYPE >",
+"output":["ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE ?",
+"input":"<!DOCTYPE ?",
+"output":["ParseError", ["DOCTYPE", "?", null, null, false]]},
+
+{"description":"<!DOCTYPE @",
+"input":"<!DOCTYPE @",
+"output":["ParseError", ["DOCTYPE", "@", null, null, false]]},
+
+{"description":"<!DOCTYPE A",
+"input":"<!DOCTYPE A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE B",
+"input":"<!DOCTYPE B",
+"output":["ParseError", ["DOCTYPE", "b", null, null, false]]},
+
+{"description":"<!DOCTYPE Y",
+"input":"<!DOCTYPE Y",
+"output":["ParseError", ["DOCTYPE", "y", null, null, false]]},
+
+{"description":"<!DOCTYPE Z",
+"input":"<!DOCTYPE Z",
+"output":["ParseError", ["DOCTYPE", "z", null, null, false]]},
+
+{"description":"<!DOCTYPE [",
+"input":"<!DOCTYPE [",
+"output":["ParseError", ["DOCTYPE", "[", null, null, false]]},
+
+{"description":"<!DOCTYPE `",
+"input":"<!DOCTYPE `",
+"output":["ParseError", ["DOCTYPE", "`", null, null, false]]},
+
+{"description":"<!DOCTYPE a",
+"input":"<!DOCTYPE a",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u0000",
+"input":"<!DOCTYPE a\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\uFFFD", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u0008",
+"input":"<!DOCTYPE a\u0008",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\u0008", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u0009",
+"input":"<!DOCTYPE a\u0009",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u000A",
+"input":"<!DOCTYPE a\u000A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u000B",
+"input":"<!DOCTYPE a\u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\u000B", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u000C",
+"input":"<!DOCTYPE a\u000C",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u000D",
+"input":"<!DOCTYPE a\u000D",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\u001F",
+"input":"<!DOCTYPE a\u001F",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\u001F", null, null, false]]},
+
+{"description":"<!DOCTYPE a ",
+"input":"<!DOCTYPE a ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u0000",
+"input":"<!DOCTYPE a \u0000",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u0008",
+"input":"<!DOCTYPE a \u0008",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u0009",
+"input":"<!DOCTYPE a \u0009",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u000A",
+"input":"<!DOCTYPE a \u000A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u000B",
+"input":"<!DOCTYPE a \u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u000C",
+"input":"<!DOCTYPE a \u000C",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u000D",
+"input":"<!DOCTYPE a \u000D",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\u001F",
+"input":"<!DOCTYPE a \u001F",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a  ",
+"input":"<!DOCTYPE a  ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a !",
+"input":"<!DOCTYPE a !",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \"",
+"input":"<!DOCTYPE a \"",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a &",
+"input":"<!DOCTYPE a &",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a '",
+"input":"<!DOCTYPE a '",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a -",
+"input":"<!DOCTYPE a -",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a /",
+"input":"<!DOCTYPE a /",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a 0",
+"input":"<!DOCTYPE a 0",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a 1",
+"input":"<!DOCTYPE a 1",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a 9",
+"input":"<!DOCTYPE a 9",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a <",
+"input":"<!DOCTYPE a <",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a =",
+"input":"<!DOCTYPE a =",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a >",
+"input":"<!DOCTYPE a >",
+"output":[["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPE a ?",
+"input":"<!DOCTYPE a ?",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a @",
+"input":"<!DOCTYPE a @",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a A",
+"input":"<!DOCTYPE a A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a B",
+"input":"<!DOCTYPE a B",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC",
+"input":"<!DOCTYPE a PUBLIC",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0000",
+"input":"<!DOCTYPE a PUBLIC\u0000",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0008",
+"input":"<!DOCTYPE a PUBLIC\u0008",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u0009",
+"input":"<!DOCTYPE a PUBLIC\u0009",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000A",
+"input":"<!DOCTYPE a PUBLIC\u000A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000B",
+"input":"<!DOCTYPE a PUBLIC\u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000C",
+"input":"<!DOCTYPE a PUBLIC\u000C",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u000D",
+"input":"<!DOCTYPE a PUBLIC\u000D",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\u001F",
+"input":"<!DOCTYPE a PUBLIC\u001F",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC ",
+"input":"<!DOCTYPE a PUBLIC ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC!",
+"input":"<!DOCTYPE a PUBLIC!",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"",
+"input":"<!DOCTYPE a PUBLIC\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u0000",
+"input":"<!DOCTYPE a PUBLIC\"\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u0009",
+"input":"<!DOCTYPE a PUBLIC\"\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000A",
+"input":"<!DOCTYPE a PUBLIC\"\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000B",
+"input":"<!DOCTYPE a PUBLIC\"\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\u000C",
+"input":"<!DOCTYPE a PUBLIC\"\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\" ",
+"input":"<!DOCTYPE a PUBLIC\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"!",
+"input":"<!DOCTYPE a PUBLIC\"!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\"",
+"input":"<!DOCTYPE a PUBLIC\"\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"#",
+"input":"<!DOCTYPE a PUBLIC\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "#", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"&",
+"input":"<!DOCTYPE a PUBLIC\"&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"'",
+"input":"<!DOCTYPE a PUBLIC\"'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "'", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"-",
+"input":"<!DOCTYPE a PUBLIC\"-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"/",
+"input":"<!DOCTYPE a PUBLIC\"/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"0",
+"input":"<!DOCTYPE a PUBLIC\"0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"1",
+"input":"<!DOCTYPE a PUBLIC\"1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"9",
+"input":"<!DOCTYPE a PUBLIC\"9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"<",
+"input":"<!DOCTYPE a PUBLIC\"<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"=",
+"input":"<!DOCTYPE a PUBLIC\"=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\">",
+"input":"<!DOCTYPE a PUBLIC\">",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"?",
+"input":"<!DOCTYPE a PUBLIC\"?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"@",
+"input":"<!DOCTYPE a PUBLIC\"@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"A",
+"input":"<!DOCTYPE a PUBLIC\"A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"B",
+"input":"<!DOCTYPE a PUBLIC\"B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"Y",
+"input":"<!DOCTYPE a PUBLIC\"Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"Z",
+"input":"<!DOCTYPE a PUBLIC\"Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"`",
+"input":"<!DOCTYPE a PUBLIC\"`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"a",
+"input":"<!DOCTYPE a PUBLIC\"a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"b",
+"input":"<!DOCTYPE a PUBLIC\"b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"y",
+"input":"<!DOCTYPE a PUBLIC\"y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"z",
+"input":"<!DOCTYPE a PUBLIC\"z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"{",
+"input":"<!DOCTYPE a PUBLIC\"{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC\"\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC#",
+"input":"<!DOCTYPE a PUBLIC#",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC&",
+"input":"<!DOCTYPE a PUBLIC&",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'",
+"input":"<!DOCTYPE a PUBLIC'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u0000",
+"input":"<!DOCTYPE a PUBLIC'\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u0009",
+"input":"<!DOCTYPE a PUBLIC'\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000A",
+"input":"<!DOCTYPE a PUBLIC'\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000B",
+"input":"<!DOCTYPE a PUBLIC'\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\u000C",
+"input":"<!DOCTYPE a PUBLIC'\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC' ",
+"input":"<!DOCTYPE a PUBLIC' ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'!",
+"input":"<!DOCTYPE a PUBLIC'!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\"",
+"input":"<!DOCTYPE a PUBLIC'\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\"", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'&",
+"input":"<!DOCTYPE a PUBLIC'&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''",
+"input":"<!DOCTYPE a PUBLIC''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0000",
+"input":"<!DOCTYPE a PUBLIC''\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0008",
+"input":"<!DOCTYPE a PUBLIC''\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u0009",
+"input":"<!DOCTYPE a PUBLIC''\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000A",
+"input":"<!DOCTYPE a PUBLIC''\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000B",
+"input":"<!DOCTYPE a PUBLIC''\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000C",
+"input":"<!DOCTYPE a PUBLIC''\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u000D",
+"input":"<!DOCTYPE a PUBLIC''\u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\u001F",
+"input":"<!DOCTYPE a PUBLIC''\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'' ",
+"input":"<!DOCTYPE a PUBLIC'' ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''!",
+"input":"<!DOCTYPE a PUBLIC''!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\"",
+"input":"<!DOCTYPE a PUBLIC''\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''#",
+"input":"<!DOCTYPE a PUBLIC''#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''&",
+"input":"<!DOCTYPE a PUBLIC''&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'''",
+"input":"<!DOCTYPE a PUBLIC'''",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''(",
+"input":"<!DOCTYPE a PUBLIC''(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''-",
+"input":"<!DOCTYPE a PUBLIC''-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''/",
+"input":"<!DOCTYPE a PUBLIC''/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''0",
+"input":"<!DOCTYPE a PUBLIC''0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''1",
+"input":"<!DOCTYPE a PUBLIC''1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''9",
+"input":"<!DOCTYPE a PUBLIC''9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''<",
+"input":"<!DOCTYPE a PUBLIC''<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''=",
+"input":"<!DOCTYPE a PUBLIC''=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''>",
+"input":"<!DOCTYPE a PUBLIC''>",
+"output":["ParseError", ["DOCTYPE", "a", "", null, true]]},
+
+{"description":"<!DOCTYPE a PUBLIC''?",
+"input":"<!DOCTYPE a PUBLIC''?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''@",
+"input":"<!DOCTYPE a PUBLIC''@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''A",
+"input":"<!DOCTYPE a PUBLIC''A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''B",
+"input":"<!DOCTYPE a PUBLIC''B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''Y",
+"input":"<!DOCTYPE a PUBLIC''Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''Z",
+"input":"<!DOCTYPE a PUBLIC''Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''`",
+"input":"<!DOCTYPE a PUBLIC''`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''a",
+"input":"<!DOCTYPE a PUBLIC''a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''b",
+"input":"<!DOCTYPE a PUBLIC''b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''y",
+"input":"<!DOCTYPE a PUBLIC''y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''z",
+"input":"<!DOCTYPE a PUBLIC''z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''{",
+"input":"<!DOCTYPE a PUBLIC''{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC''\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC''\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'(",
+"input":"<!DOCTYPE a PUBLIC'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "(", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'-",
+"input":"<!DOCTYPE a PUBLIC'-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'/",
+"input":"<!DOCTYPE a PUBLIC'/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'0",
+"input":"<!DOCTYPE a PUBLIC'0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'1",
+"input":"<!DOCTYPE a PUBLIC'1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'9",
+"input":"<!DOCTYPE a PUBLIC'9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'<",
+"input":"<!DOCTYPE a PUBLIC'<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'=",
+"input":"<!DOCTYPE a PUBLIC'=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'>",
+"input":"<!DOCTYPE a PUBLIC'>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'?",
+"input":"<!DOCTYPE a PUBLIC'?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'@",
+"input":"<!DOCTYPE a PUBLIC'@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'A",
+"input":"<!DOCTYPE a PUBLIC'A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'B",
+"input":"<!DOCTYPE a PUBLIC'B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'Y",
+"input":"<!DOCTYPE a PUBLIC'Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'Z",
+"input":"<!DOCTYPE a PUBLIC'Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'`",
+"input":"<!DOCTYPE a PUBLIC'`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'a",
+"input":"<!DOCTYPE a PUBLIC'a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'b",
+"input":"<!DOCTYPE a PUBLIC'b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'y",
+"input":"<!DOCTYPE a PUBLIC'y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'z",
+"input":"<!DOCTYPE a PUBLIC'z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'{",
+"input":"<!DOCTYPE a PUBLIC'{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC'\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC'\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC(",
+"input":"<!DOCTYPE a PUBLIC(",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC-",
+"input":"<!DOCTYPE a PUBLIC-",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC/",
+"input":"<!DOCTYPE a PUBLIC/",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC0",
+"input":"<!DOCTYPE a PUBLIC0",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC1",
+"input":"<!DOCTYPE a PUBLIC1",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC9",
+"input":"<!DOCTYPE a PUBLIC9",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC<",
+"input":"<!DOCTYPE a PUBLIC<",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC=",
+"input":"<!DOCTYPE a PUBLIC=",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC>",
+"input":"<!DOCTYPE a PUBLIC>",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC?",
+"input":"<!DOCTYPE a PUBLIC?",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC@",
+"input":"<!DOCTYPE a PUBLIC@",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICA",
+"input":"<!DOCTYPE a PUBLICA",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICB",
+"input":"<!DOCTYPE a PUBLICB",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICY",
+"input":"<!DOCTYPE a PUBLICY",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICZ",
+"input":"<!DOCTYPE a PUBLICZ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC`",
+"input":"<!DOCTYPE a PUBLIC`",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICa",
+"input":"<!DOCTYPE a PUBLICa",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICb",
+"input":"<!DOCTYPE a PUBLICb",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICy",
+"input":"<!DOCTYPE a PUBLICy",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLICz",
+"input":"<!DOCTYPE a PUBLICz",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC{",
+"input":"<!DOCTYPE a PUBLIC{",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a PUBLIC\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a PUBLIC\uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM",
+"input":"<!DOCTYPE a SYSTEM",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0000",
+"input":"<!DOCTYPE a SYSTEM\u0000",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0008",
+"input":"<!DOCTYPE a SYSTEM\u0008",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u0009",
+"input":"<!DOCTYPE a SYSTEM\u0009",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000A",
+"input":"<!DOCTYPE a SYSTEM\u000A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000B",
+"input":"<!DOCTYPE a SYSTEM\u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000C",
+"input":"<!DOCTYPE a SYSTEM\u000C",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u000D",
+"input":"<!DOCTYPE a SYSTEM\u000D",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\u001F",
+"input":"<!DOCTYPE a SYSTEM\u001F",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM ",
+"input":"<!DOCTYPE a SYSTEM ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM!",
+"input":"<!DOCTYPE a SYSTEM!",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"",
+"input":"<!DOCTYPE a SYSTEM\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u0000",
+"input":"<!DOCTYPE a SYSTEM\"\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u0009",
+"input":"<!DOCTYPE a SYSTEM\"\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000A",
+"input":"<!DOCTYPE a SYSTEM\"\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000B",
+"input":"<!DOCTYPE a SYSTEM\"\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\u000C",
+"input":"<!DOCTYPE a SYSTEM\"\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\" ",
+"input":"<!DOCTYPE a SYSTEM\" ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"!",
+"input":"<!DOCTYPE a SYSTEM\"!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\"",
+"input":"<!DOCTYPE a SYSTEM\"\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"#",
+"input":"<!DOCTYPE a SYSTEM\"#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "#", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"&",
+"input":"<!DOCTYPE a SYSTEM\"&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"'",
+"input":"<!DOCTYPE a SYSTEM\"'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "'", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"-",
+"input":"<!DOCTYPE a SYSTEM\"-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"/",
+"input":"<!DOCTYPE a SYSTEM\"/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"0",
+"input":"<!DOCTYPE a SYSTEM\"0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"1",
+"input":"<!DOCTYPE a SYSTEM\"1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"9",
+"input":"<!DOCTYPE a SYSTEM\"9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"<",
+"input":"<!DOCTYPE a SYSTEM\"<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"=",
+"input":"<!DOCTYPE a SYSTEM\"=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\">",
+"input":"<!DOCTYPE a SYSTEM\">",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"?",
+"input":"<!DOCTYPE a SYSTEM\"?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"@",
+"input":"<!DOCTYPE a SYSTEM\"@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"A",
+"input":"<!DOCTYPE a SYSTEM\"A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"B",
+"input":"<!DOCTYPE a SYSTEM\"B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"Y",
+"input":"<!DOCTYPE a SYSTEM\"Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"Z",
+"input":"<!DOCTYPE a SYSTEM\"Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"`",
+"input":"<!DOCTYPE a SYSTEM\"`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"a",
+"input":"<!DOCTYPE a SYSTEM\"a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"b",
+"input":"<!DOCTYPE a SYSTEM\"b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"y",
+"input":"<!DOCTYPE a SYSTEM\"y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"z",
+"input":"<!DOCTYPE a SYSTEM\"z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"{",
+"input":"<!DOCTYPE a SYSTEM\"{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM\"\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM#",
+"input":"<!DOCTYPE a SYSTEM#",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM&",
+"input":"<!DOCTYPE a SYSTEM&",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'",
+"input":"<!DOCTYPE a SYSTEM'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u0000",
+"input":"<!DOCTYPE a SYSTEM'\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u0009",
+"input":"<!DOCTYPE a SYSTEM'\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000A",
+"input":"<!DOCTYPE a SYSTEM'\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000B",
+"input":"<!DOCTYPE a SYSTEM'\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\u000C",
+"input":"<!DOCTYPE a SYSTEM'\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM' ",
+"input":"<!DOCTYPE a SYSTEM' ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'!",
+"input":"<!DOCTYPE a SYSTEM'!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\"",
+"input":"<!DOCTYPE a SYSTEM'\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\"", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'&",
+"input":"<!DOCTYPE a SYSTEM'&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''",
+"input":"<!DOCTYPE a SYSTEM''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0000",
+"input":"<!DOCTYPE a SYSTEM''\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0008",
+"input":"<!DOCTYPE a SYSTEM''\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u0009",
+"input":"<!DOCTYPE a SYSTEM''\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000A",
+"input":"<!DOCTYPE a SYSTEM''\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000B",
+"input":"<!DOCTYPE a SYSTEM''\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000C",
+"input":"<!DOCTYPE a SYSTEM''\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u000D",
+"input":"<!DOCTYPE a SYSTEM''\u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\u001F",
+"input":"<!DOCTYPE a SYSTEM''\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM'' ",
+"input":"<!DOCTYPE a SYSTEM'' ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM''!",
+"input":"<!DOCTYPE a SYSTEM''!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\"",
+"input":"<!DOCTYPE a SYSTEM''\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''&",
+"input":"<!DOCTYPE a SYSTEM''&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM'''",
+"input":"<!DOCTYPE a SYSTEM'''",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''-",
+"input":"<!DOCTYPE a SYSTEM''-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''/",
+"input":"<!DOCTYPE a SYSTEM''/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''0",
+"input":"<!DOCTYPE a SYSTEM''0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''1",
+"input":"<!DOCTYPE a SYSTEM''1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''9",
+"input":"<!DOCTYPE a SYSTEM''9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''<",
+"input":"<!DOCTYPE a SYSTEM''<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''=",
+"input":"<!DOCTYPE a SYSTEM''=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''>",
+"input":"<!DOCTYPE a SYSTEM''>",
+"output":["ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''?",
+"input":"<!DOCTYPE a SYSTEM''?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''@",
+"input":"<!DOCTYPE a SYSTEM''@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''A",
+"input":"<!DOCTYPE a SYSTEM''A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''B",
+"input":"<!DOCTYPE a SYSTEM''B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''Y",
+"input":"<!DOCTYPE a SYSTEM''Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''Z",
+"input":"<!DOCTYPE a SYSTEM''Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''`",
+"input":"<!DOCTYPE a SYSTEM''`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''a",
+"input":"<!DOCTYPE a SYSTEM''a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''b",
+"input":"<!DOCTYPE a SYSTEM''b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''y",
+"input":"<!DOCTYPE a SYSTEM''y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''z",
+"input":"<!DOCTYPE a SYSTEM''z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''{",
+"input":"<!DOCTYPE a SYSTEM''{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM''\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM''\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPE a SYSTEM'(",
+"input":"<!DOCTYPE a SYSTEM'(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "(", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'-",
+"input":"<!DOCTYPE a SYSTEM'-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'/",
+"input":"<!DOCTYPE a SYSTEM'/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'0",
+"input":"<!DOCTYPE a SYSTEM'0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'1",
+"input":"<!DOCTYPE a SYSTEM'1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'9",
+"input":"<!DOCTYPE a SYSTEM'9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'<",
+"input":"<!DOCTYPE a SYSTEM'<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'=",
+"input":"<!DOCTYPE a SYSTEM'=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'>",
+"input":"<!DOCTYPE a SYSTEM'>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'?",
+"input":"<!DOCTYPE a SYSTEM'?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'@",
+"input":"<!DOCTYPE a SYSTEM'@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'A",
+"input":"<!DOCTYPE a SYSTEM'A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'B",
+"input":"<!DOCTYPE a SYSTEM'B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'Y",
+"input":"<!DOCTYPE a SYSTEM'Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'Z",
+"input":"<!DOCTYPE a SYSTEM'Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'`",
+"input":"<!DOCTYPE a SYSTEM'`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'a",
+"input":"<!DOCTYPE a SYSTEM'a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'b",
+"input":"<!DOCTYPE a SYSTEM'b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'y",
+"input":"<!DOCTYPE a SYSTEM'y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'z",
+"input":"<!DOCTYPE a SYSTEM'z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'{",
+"input":"<!DOCTYPE a SYSTEM'{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM'\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM'\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+
+{"description":"<!DOCTYPE a SYSTEM(",
+"input":"<!DOCTYPE a SYSTEM(",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM-",
+"input":"<!DOCTYPE a SYSTEM-",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM/",
+"input":"<!DOCTYPE a SYSTEM/",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM0",
+"input":"<!DOCTYPE a SYSTEM0",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM1",
+"input":"<!DOCTYPE a SYSTEM1",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM9",
+"input":"<!DOCTYPE a SYSTEM9",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM<",
+"input":"<!DOCTYPE a SYSTEM<",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM=",
+"input":"<!DOCTYPE a SYSTEM=",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM>",
+"input":"<!DOCTYPE a SYSTEM>",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM?",
+"input":"<!DOCTYPE a SYSTEM?",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM@",
+"input":"<!DOCTYPE a SYSTEM@",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMA",
+"input":"<!DOCTYPE a SYSTEMA",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMB",
+"input":"<!DOCTYPE a SYSTEMB",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMY",
+"input":"<!DOCTYPE a SYSTEMY",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMZ",
+"input":"<!DOCTYPE a SYSTEMZ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM`",
+"input":"<!DOCTYPE a SYSTEM`",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMa",
+"input":"<!DOCTYPE a SYSTEMa",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMb",
+"input":"<!DOCTYPE a SYSTEMb",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMy",
+"input":"<!DOCTYPE a SYSTEMy",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEMz",
+"input":"<!DOCTYPE a SYSTEMz",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM{",
+"input":"<!DOCTYPE a SYSTEM{",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a SYSTEM\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a SYSTEM\uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a Y",
+"input":"<!DOCTYPE a Y",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a Z",
+"input":"<!DOCTYPE a Z",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a `",
+"input":"<!DOCTYPE a `",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a",
+"input":"<!DOCTYPE a a",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\u0000",
+"input":"<!DOCTYPE a a\u0000",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\u0009",
+"input":"<!DOCTYPE a a\u0009",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\u000A",
+"input":"<!DOCTYPE a a\u000A",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\u000B",
+"input":"<!DOCTYPE a a\u000B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\u000C",
+"input":"<!DOCTYPE a a\u000C",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a ",
+"input":"<!DOCTYPE a a ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a!",
+"input":"<!DOCTYPE a a!",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\"",
+"input":"<!DOCTYPE a a\"",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a&",
+"input":"<!DOCTYPE a a&",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a'",
+"input":"<!DOCTYPE a a'",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a-",
+"input":"<!DOCTYPE a a-",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a/",
+"input":"<!DOCTYPE a a/",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a0",
+"input":"<!DOCTYPE a a0",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a1",
+"input":"<!DOCTYPE a a1",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a9",
+"input":"<!DOCTYPE a a9",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a<",
+"input":"<!DOCTYPE a a<",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a=",
+"input":"<!DOCTYPE a a=",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a>",
+"input":"<!DOCTYPE a a>",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a?",
+"input":"<!DOCTYPE a a?",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a@",
+"input":"<!DOCTYPE a a@",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a aA",
+"input":"<!DOCTYPE a aA",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a aB",
+"input":"<!DOCTYPE a aB",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a aY",
+"input":"<!DOCTYPE a aY",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a aZ",
+"input":"<!DOCTYPE a aZ",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a`",
+"input":"<!DOCTYPE a a`",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a aa",
+"input":"<!DOCTYPE a aa",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a ab",
+"input":"<!DOCTYPE a ab",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a ay",
+"input":"<!DOCTYPE a ay",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a az",
+"input":"<!DOCTYPE a az",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a{",
+"input":"<!DOCTYPE a a{",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a a\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a a\uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a b",
+"input":"<!DOCTYPE a b",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a y",
+"input":"<!DOCTYPE a y",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a z",
+"input":"<!DOCTYPE a z",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a {",
+"input":"<!DOCTYPE a {",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a \\uDBC0\\uDC00",
+"input":"<!DOCTYPE a \uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPE a!",
+"input":"<!DOCTYPE a!",
+"output":["ParseError", ["DOCTYPE", "a!", null, null, false]]},
+
+{"description":"<!DOCTYPE a\"",
+"input":"<!DOCTYPE a\"",
+"output":["ParseError", ["DOCTYPE", "a\"", null, null, false]]},
+
+{"description":"<!DOCTYPE a&",
+"input":"<!DOCTYPE a&",
+"output":["ParseError", ["DOCTYPE", "a&", null, null, false]]},
+
+{"description":"<!DOCTYPE a'",
+"input":"<!DOCTYPE a'",
+"output":["ParseError", ["DOCTYPE", "a'", null, null, false]]},
+
+{"description":"<!DOCTYPE a-",
+"input":"<!DOCTYPE a-",
+"output":["ParseError", ["DOCTYPE", "a-", null, null, false]]},
+
+{"description":"<!DOCTYPE a/",
+"input":"<!DOCTYPE a/",
+"output":["ParseError", ["DOCTYPE", "a/", null, null, false]]},
+
+{"description":"<!DOCTYPE a0",
+"input":"<!DOCTYPE a0",
+"output":["ParseError", ["DOCTYPE", "a0", null, null, false]]},
+
+{"description":"<!DOCTYPE a1",
+"input":"<!DOCTYPE a1",
+"output":["ParseError", ["DOCTYPE", "a1", null, null, false]]},
+
+{"description":"<!DOCTYPE a9",
+"input":"<!DOCTYPE a9",
+"output":["ParseError", ["DOCTYPE", "a9", null, null, false]]},
+
+{"description":"<!DOCTYPE a<",
+"input":"<!DOCTYPE a<",
+"output":["ParseError", ["DOCTYPE", "a<", null, null, false]]},
+
+{"description":"<!DOCTYPE a=",
+"input":"<!DOCTYPE a=",
+"output":["ParseError", ["DOCTYPE", "a=", null, null, false]]},
+
+{"description":"<!DOCTYPE a>",
+"input":"<!DOCTYPE a>",
+"output":[["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPE a?",
+"input":"<!DOCTYPE a?",
+"output":["ParseError", ["DOCTYPE", "a?", null, null, false]]},
+
+{"description":"<!DOCTYPE a@",
+"input":"<!DOCTYPE a@",
+"output":["ParseError", ["DOCTYPE", "a@", null, null, false]]},
+
+{"description":"<!DOCTYPE aA",
+"input":"<!DOCTYPE aA",
+"output":["ParseError", ["DOCTYPE", "aa", null, null, false]]},
+
+{"description":"<!DOCTYPE aB",
+"input":"<!DOCTYPE aB",
+"output":["ParseError", ["DOCTYPE", "ab", null, null, false]]},
+
+{"description":"<!DOCTYPE aY",
+"input":"<!DOCTYPE aY",
+"output":["ParseError", ["DOCTYPE", "ay", null, null, false]]},
+
+{"description":"<!DOCTYPE aZ",
+"input":"<!DOCTYPE aZ",
+"output":["ParseError", ["DOCTYPE", "az", null, null, false]]},
+
+{"description":"<!DOCTYPE a[",
+"input":"<!DOCTYPE a[",
+"output":["ParseError", ["DOCTYPE", "a[", null, null, false]]},
+
+{"description":"<!DOCTYPE a`",
+"input":"<!DOCTYPE a`",
+"output":["ParseError", ["DOCTYPE", "a`", null, null, false]]},
+
+{"description":"<!DOCTYPE aa",
+"input":"<!DOCTYPE aa",
+"output":["ParseError", ["DOCTYPE", "aa", null, null, false]]},
+
+{"description":"<!DOCTYPE ab",
+"input":"<!DOCTYPE ab",
+"output":["ParseError", ["DOCTYPE", "ab", null, null, false]]},
+
+{"description":"<!DOCTYPE ay",
+"input":"<!DOCTYPE ay",
+"output":["ParseError", ["DOCTYPE", "ay", null, null, false]]},
+
+{"description":"<!DOCTYPE az",
+"input":"<!DOCTYPE az",
+"output":["ParseError", ["DOCTYPE", "az", null, null, false]]},
+
+{"description":"<!DOCTYPE a{",
+"input":"<!DOCTYPE a{",
+"output":["ParseError", ["DOCTYPE", "a{", null, null, false]]},
+
+{"description":"<!DOCTYPE a\\uDBC0\\uDC00",
+"input":"<!DOCTYPE a\uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "a\uDBC0\uDC00", null, null, false]]},
+
+{"description":"<!DOCTYPE b",
+"input":"<!DOCTYPE b",
+"output":["ParseError", ["DOCTYPE", "b", null, null, false]]},
+
+{"description":"<!DOCTYPE y",
+"input":"<!DOCTYPE y",
+"output":["ParseError", ["DOCTYPE", "y", null, null, false]]},
+
+{"description":"<!DOCTYPE z",
+"input":"<!DOCTYPE z",
+"output":["ParseError", ["DOCTYPE", "z", null, null, false]]},
+
+{"description":"<!DOCTYPE {",
+"input":"<!DOCTYPE {",
+"output":["ParseError", ["DOCTYPE", "{", null, null, false]]},
+
+{"description":"<!DOCTYPE \\uDBC0\\uDC00",
+"input":"<!DOCTYPE \uDBC0\uDC00",
+"output":["ParseError", ["DOCTYPE", "\uDBC0\uDC00", null, null, false]]},
+
+{"description":"<!DOCTYPE!",
+"input":"<!DOCTYPE!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "!", null, null, false]]},
+
+{"description":"<!DOCTYPE\"",
+"input":"<!DOCTYPE\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\"", null, null, false]]},
+
+{"description":"<!DOCTYPE&",
+"input":"<!DOCTYPE&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "&", null, null, false]]},
+
+{"description":"<!DOCTYPE'",
+"input":"<!DOCTYPE'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "'", null, null, false]]},
+
+{"description":"<!DOCTYPE-",
+"input":"<!DOCTYPE-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "-", null, null, false]]},
+
+{"description":"<!DOCTYPE/",
+"input":"<!DOCTYPE/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "/", null, null, false]]},
+
+{"description":"<!DOCTYPE0",
+"input":"<!DOCTYPE0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "0", null, null, false]]},
+
+{"description":"<!DOCTYPE1",
+"input":"<!DOCTYPE1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "1", null, null, false]]},
+
+{"description":"<!DOCTYPE9",
+"input":"<!DOCTYPE9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "9", null, null, false]]},
+
+{"description":"<!DOCTYPE<",
+"input":"<!DOCTYPE<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "<", null, null, false]]},
+
+{"description":"<!DOCTYPE=",
+"input":"<!DOCTYPE=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "=", null, null, false]]},
+
+{"description":"<!DOCTYPE>",
+"input":"<!DOCTYPE>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "", null, null, false]]},
+
+{"description":"<!DOCTYPE?",
+"input":"<!DOCTYPE?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "?", null, null, false]]},
+
+{"description":"<!DOCTYPE@",
+"input":"<!DOCTYPE@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "@", null, null, false]]},
+
+{"description":"<!DOCTYPEA",
+"input":"<!DOCTYPEA",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEB",
+"input":"<!DOCTYPEB",
+"output":["ParseError", "ParseError", ["DOCTYPE", "b", null, null, false]]},
+
+{"description":"<!DOCTYPEY",
+"input":"<!DOCTYPEY",
+"output":["ParseError", "ParseError", ["DOCTYPE", "y", null, null, false]]},
+
+{"description":"<!DOCTYPEZ",
+"input":"<!DOCTYPEZ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "z", null, null, false]]},
+
+{"description":"<!DOCTYPE`",
+"input":"<!DOCTYPE`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "`", null, null, false]]},
+
+{"description":"<!DOCTYPEa",
+"input":"<!DOCTYPEa",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u0000",
+"input":"<!DOCTYPEa\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a\uFFFD", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u0008",
+"input":"<!DOCTYPEa\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a\u0008", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u0009",
+"input":"<!DOCTYPEa\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u000A",
+"input":"<!DOCTYPEa\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u000B",
+"input":"<!DOCTYPEa\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a\u000B", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u000C",
+"input":"<!DOCTYPEa\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u000D",
+"input":"<!DOCTYPEa\u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\u001F",
+"input":"<!DOCTYPEa\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a\u001F", null, null, false]]},
+
+{"description":"<!DOCTYPEa ",
+"input":"<!DOCTYPEa ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u0000",
+"input":"<!DOCTYPEa \u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u0008",
+"input":"<!DOCTYPEa \u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u0009",
+"input":"<!DOCTYPEa \u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u000A",
+"input":"<!DOCTYPEa \u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u000B",
+"input":"<!DOCTYPEa \u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u000C",
+"input":"<!DOCTYPEa \u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u000D",
+"input":"<!DOCTYPEa \u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\u001F",
+"input":"<!DOCTYPEa \u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa  ",
+"input":"<!DOCTYPEa  ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa !",
+"input":"<!DOCTYPEa !",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \"",
+"input":"<!DOCTYPEa \"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa &",
+"input":"<!DOCTYPEa &",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa '",
+"input":"<!DOCTYPEa '",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa -",
+"input":"<!DOCTYPEa -",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa /",
+"input":"<!DOCTYPEa /",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa 0",
+"input":"<!DOCTYPEa 0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa 1",
+"input":"<!DOCTYPEa 1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa 9",
+"input":"<!DOCTYPEa 9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa <",
+"input":"<!DOCTYPEa <",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa =",
+"input":"<!DOCTYPEa =",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa >",
+"input":"<!DOCTYPEa >",
+"output":["ParseError", ["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPEa ?",
+"input":"<!DOCTYPEa ?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa @",
+"input":"<!DOCTYPEa @",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa A",
+"input":"<!DOCTYPEa A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa B",
+"input":"<!DOCTYPEa B",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC",
+"input":"<!DOCTYPEa PUBLIC",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0000",
+"input":"<!DOCTYPEa PUBLIC\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0008",
+"input":"<!DOCTYPEa PUBLIC\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u0009",
+"input":"<!DOCTYPEa PUBLIC\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000A",
+"input":"<!DOCTYPEa PUBLIC\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000B",
+"input":"<!DOCTYPEa PUBLIC\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000C",
+"input":"<!DOCTYPEa PUBLIC\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u000D",
+"input":"<!DOCTYPEa PUBLIC\u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\u001F",
+"input":"<!DOCTYPEa PUBLIC\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC ",
+"input":"<!DOCTYPEa PUBLIC ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC!",
+"input":"<!DOCTYPEa PUBLIC!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"",
+"input":"<!DOCTYPEa PUBLIC\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u0000",
+"input":"<!DOCTYPEa PUBLIC\"\u0000",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u0009",
+"input":"<!DOCTYPEa PUBLIC\"\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000A",
+"input":"<!DOCTYPEa PUBLIC\"\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000B",
+"input":"<!DOCTYPEa PUBLIC\"\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\u000C",
+"input":"<!DOCTYPEa PUBLIC\"\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\" ",
+"input":"<!DOCTYPEa PUBLIC\" ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"!",
+"input":"<!DOCTYPEa PUBLIC\"!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\"",
+"input":"<!DOCTYPEa PUBLIC\"\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"#",
+"input":"<!DOCTYPEa PUBLIC\"#",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "#", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"&",
+"input":"<!DOCTYPEa PUBLIC\"&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"'",
+"input":"<!DOCTYPEa PUBLIC\"'",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "'", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"-",
+"input":"<!DOCTYPEa PUBLIC\"-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"/",
+"input":"<!DOCTYPEa PUBLIC\"/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"0",
+"input":"<!DOCTYPEa PUBLIC\"0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"1",
+"input":"<!DOCTYPEa PUBLIC\"1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"9",
+"input":"<!DOCTYPEa PUBLIC\"9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"<",
+"input":"<!DOCTYPEa PUBLIC\"<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"=",
+"input":"<!DOCTYPEa PUBLIC\"=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\">",
+"input":"<!DOCTYPEa PUBLIC\">",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"?",
+"input":"<!DOCTYPEa PUBLIC\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"@",
+"input":"<!DOCTYPEa PUBLIC\"@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"A",
+"input":"<!DOCTYPEa PUBLIC\"A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"B",
+"input":"<!DOCTYPEa PUBLIC\"B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"Y",
+"input":"<!DOCTYPEa PUBLIC\"Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"Z",
+"input":"<!DOCTYPEa PUBLIC\"Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"`",
+"input":"<!DOCTYPEa PUBLIC\"`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"a",
+"input":"<!DOCTYPEa PUBLIC\"a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"b",
+"input":"<!DOCTYPEa PUBLIC\"b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"y",
+"input":"<!DOCTYPEa PUBLIC\"y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"z",
+"input":"<!DOCTYPEa PUBLIC\"z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"{",
+"input":"<!DOCTYPEa PUBLIC\"{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC\"\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC#",
+"input":"<!DOCTYPEa PUBLIC#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC&",
+"input":"<!DOCTYPEa PUBLIC&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'",
+"input":"<!DOCTYPEa PUBLIC'",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u0000",
+"input":"<!DOCTYPEa PUBLIC'\u0000",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uFFFD", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u0009",
+"input":"<!DOCTYPEa PUBLIC'\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u0009", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000A",
+"input":"<!DOCTYPEa PUBLIC'\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000A", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000B",
+"input":"<!DOCTYPEa PUBLIC'\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000B", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\u000C",
+"input":"<!DOCTYPEa PUBLIC'\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\u000C", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC' ",
+"input":"<!DOCTYPEa PUBLIC' ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", " ", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'!",
+"input":"<!DOCTYPEa PUBLIC'!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "!", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\"",
+"input":"<!DOCTYPEa PUBLIC'\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\"", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'&",
+"input":"<!DOCTYPEa PUBLIC'&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "&", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''",
+"input":"<!DOCTYPEa PUBLIC''",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0000",
+"input":"<!DOCTYPEa PUBLIC''\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0008",
+"input":"<!DOCTYPEa PUBLIC''\u0008",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u0009",
+"input":"<!DOCTYPEa PUBLIC''\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000A",
+"input":"<!DOCTYPEa PUBLIC''\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000B",
+"input":"<!DOCTYPEa PUBLIC''\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000C",
+"input":"<!DOCTYPEa PUBLIC''\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u000D",
+"input":"<!DOCTYPEa PUBLIC''\u000D",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\u001F",
+"input":"<!DOCTYPEa PUBLIC''\u001F",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'' ",
+"input":"<!DOCTYPEa PUBLIC'' ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''!",
+"input":"<!DOCTYPEa PUBLIC''!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\"",
+"input":"<!DOCTYPEa PUBLIC''\"",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''#",
+"input":"<!DOCTYPEa PUBLIC''#",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''&",
+"input":"<!DOCTYPEa PUBLIC''&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'''",
+"input":"<!DOCTYPEa PUBLIC'''",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", "", false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''(",
+"input":"<!DOCTYPEa PUBLIC''(",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''-",
+"input":"<!DOCTYPEa PUBLIC''-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''/",
+"input":"<!DOCTYPEa PUBLIC''/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''0",
+"input":"<!DOCTYPEa PUBLIC''0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''1",
+"input":"<!DOCTYPEa PUBLIC''1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''9",
+"input":"<!DOCTYPEa PUBLIC''9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''<",
+"input":"<!DOCTYPEa PUBLIC''<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''=",
+"input":"<!DOCTYPEa PUBLIC''=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''>",
+"input":"<!DOCTYPEa PUBLIC''>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, true]]},
+
+{"description":"<!DOCTYPEa PUBLIC''?",
+"input":"<!DOCTYPEa PUBLIC''?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''@",
+"input":"<!DOCTYPEa PUBLIC''@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''A",
+"input":"<!DOCTYPEa PUBLIC''A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''B",
+"input":"<!DOCTYPEa PUBLIC''B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''Y",
+"input":"<!DOCTYPEa PUBLIC''Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''Z",
+"input":"<!DOCTYPEa PUBLIC''Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''`",
+"input":"<!DOCTYPEa PUBLIC''`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''a",
+"input":"<!DOCTYPEa PUBLIC''a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''b",
+"input":"<!DOCTYPEa PUBLIC''b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''y",
+"input":"<!DOCTYPEa PUBLIC''y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''z",
+"input":"<!DOCTYPEa PUBLIC''z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''{",
+"input":"<!DOCTYPEa PUBLIC''{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC''\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC''\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'(",
+"input":"<!DOCTYPEa PUBLIC'(",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "(", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'-",
+"input":"<!DOCTYPEa PUBLIC'-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "-", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'/",
+"input":"<!DOCTYPEa PUBLIC'/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "/", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'0",
+"input":"<!DOCTYPEa PUBLIC'0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "0", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'1",
+"input":"<!DOCTYPEa PUBLIC'1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "1", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'9",
+"input":"<!DOCTYPEa PUBLIC'9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "9", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'<",
+"input":"<!DOCTYPEa PUBLIC'<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "<", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'=",
+"input":"<!DOCTYPEa PUBLIC'=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "=", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'>",
+"input":"<!DOCTYPEa PUBLIC'>",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'?",
+"input":"<!DOCTYPEa PUBLIC'?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "?", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'@",
+"input":"<!DOCTYPEa PUBLIC'@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "@", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'A",
+"input":"<!DOCTYPEa PUBLIC'A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "A", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'B",
+"input":"<!DOCTYPEa PUBLIC'B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "B", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'Y",
+"input":"<!DOCTYPEa PUBLIC'Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Y", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'Z",
+"input":"<!DOCTYPEa PUBLIC'Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "Z", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'`",
+"input":"<!DOCTYPEa PUBLIC'`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "`", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'a",
+"input":"<!DOCTYPEa PUBLIC'a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "a", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'b",
+"input":"<!DOCTYPEa PUBLIC'b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "b", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'y",
+"input":"<!DOCTYPEa PUBLIC'y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "y", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'z",
+"input":"<!DOCTYPEa PUBLIC'z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "z", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'{",
+"input":"<!DOCTYPEa PUBLIC'{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "{", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC'\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC'\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "\uDBC0\uDC00", null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC(",
+"input":"<!DOCTYPEa PUBLIC(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC-",
+"input":"<!DOCTYPEa PUBLIC-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC/",
+"input":"<!DOCTYPEa PUBLIC/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC0",
+"input":"<!DOCTYPEa PUBLIC0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC1",
+"input":"<!DOCTYPEa PUBLIC1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC9",
+"input":"<!DOCTYPEa PUBLIC9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC<",
+"input":"<!DOCTYPEa PUBLIC<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC=",
+"input":"<!DOCTYPEa PUBLIC=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC>",
+"input":"<!DOCTYPEa PUBLIC>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC?",
+"input":"<!DOCTYPEa PUBLIC?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC@",
+"input":"<!DOCTYPEa PUBLIC@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICA",
+"input":"<!DOCTYPEa PUBLICA",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICB",
+"input":"<!DOCTYPEa PUBLICB",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICY",
+"input":"<!DOCTYPEa PUBLICY",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICZ",
+"input":"<!DOCTYPEa PUBLICZ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC`",
+"input":"<!DOCTYPEa PUBLIC`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICa",
+"input":"<!DOCTYPEa PUBLICa",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICb",
+"input":"<!DOCTYPEa PUBLICb",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICy",
+"input":"<!DOCTYPEa PUBLICy",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLICz",
+"input":"<!DOCTYPEa PUBLICz",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC{",
+"input":"<!DOCTYPEa PUBLIC{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa PUBLIC\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa PUBLIC\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM",
+"input":"<!DOCTYPEa SYSTEM",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0000",
+"input":"<!DOCTYPEa SYSTEM\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0008",
+"input":"<!DOCTYPEa SYSTEM\u0008",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u0009",
+"input":"<!DOCTYPEa SYSTEM\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000A",
+"input":"<!DOCTYPEa SYSTEM\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000B",
+"input":"<!DOCTYPEa SYSTEM\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000C",
+"input":"<!DOCTYPEa SYSTEM\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u000D",
+"input":"<!DOCTYPEa SYSTEM\u000D",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\u001F",
+"input":"<!DOCTYPEa SYSTEM\u001F",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM ",
+"input":"<!DOCTYPEa SYSTEM ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM!",
+"input":"<!DOCTYPEa SYSTEM!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"",
+"input":"<!DOCTYPEa SYSTEM\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u0000",
+"input":"<!DOCTYPEa SYSTEM\"\u0000",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u0009",
+"input":"<!DOCTYPEa SYSTEM\"\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000A",
+"input":"<!DOCTYPEa SYSTEM\"\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000B",
+"input":"<!DOCTYPEa SYSTEM\"\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\u000C",
+"input":"<!DOCTYPEa SYSTEM\"\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\" ",
+"input":"<!DOCTYPEa SYSTEM\" ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"!",
+"input":"<!DOCTYPEa SYSTEM\"!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\"",
+"input":"<!DOCTYPEa SYSTEM\"\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"#",
+"input":"<!DOCTYPEa SYSTEM\"#",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "#", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"&",
+"input":"<!DOCTYPEa SYSTEM\"&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"'",
+"input":"<!DOCTYPEa SYSTEM\"'",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "'", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"-",
+"input":"<!DOCTYPEa SYSTEM\"-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"/",
+"input":"<!DOCTYPEa SYSTEM\"/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"0",
+"input":"<!DOCTYPEa SYSTEM\"0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"1",
+"input":"<!DOCTYPEa SYSTEM\"1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"9",
+"input":"<!DOCTYPEa SYSTEM\"9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"<",
+"input":"<!DOCTYPEa SYSTEM\"<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"=",
+"input":"<!DOCTYPEa SYSTEM\"=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\">",
+"input":"<!DOCTYPEa SYSTEM\">",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"?",
+"input":"<!DOCTYPEa SYSTEM\"?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"@",
+"input":"<!DOCTYPEa SYSTEM\"@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"A",
+"input":"<!DOCTYPEa SYSTEM\"A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"B",
+"input":"<!DOCTYPEa SYSTEM\"B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"Y",
+"input":"<!DOCTYPEa SYSTEM\"Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"Z",
+"input":"<!DOCTYPEa SYSTEM\"Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"`",
+"input":"<!DOCTYPEa SYSTEM\"`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"a",
+"input":"<!DOCTYPEa SYSTEM\"a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"b",
+"input":"<!DOCTYPEa SYSTEM\"b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"y",
+"input":"<!DOCTYPEa SYSTEM\"y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"z",
+"input":"<!DOCTYPEa SYSTEM\"z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"{",
+"input":"<!DOCTYPEa SYSTEM\"{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\"\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM\"\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM#",
+"input":"<!DOCTYPEa SYSTEM#",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM&",
+"input":"<!DOCTYPEa SYSTEM&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'",
+"input":"<!DOCTYPEa SYSTEM'",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u0000",
+"input":"<!DOCTYPEa SYSTEM'\u0000",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uFFFD", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u0009",
+"input":"<!DOCTYPEa SYSTEM'\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u0009", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000A",
+"input":"<!DOCTYPEa SYSTEM'\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000A", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000B",
+"input":"<!DOCTYPEa SYSTEM'\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000B", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\u000C",
+"input":"<!DOCTYPEa SYSTEM'\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\u000C", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM' ",
+"input":"<!DOCTYPEa SYSTEM' ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, " ", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'!",
+"input":"<!DOCTYPEa SYSTEM'!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "!", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\"",
+"input":"<!DOCTYPEa SYSTEM'\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\"", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'&",
+"input":"<!DOCTYPEa SYSTEM'&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "&", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''",
+"input":"<!DOCTYPEa SYSTEM''",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0000",
+"input":"<!DOCTYPEa SYSTEM''\u0000",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0008",
+"input":"<!DOCTYPEa SYSTEM''\u0008",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u0009",
+"input":"<!DOCTYPEa SYSTEM''\u0009",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000A",
+"input":"<!DOCTYPEa SYSTEM''\u000A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000B",
+"input":"<!DOCTYPEa SYSTEM''\u000B",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000C",
+"input":"<!DOCTYPEa SYSTEM''\u000C",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u000D",
+"input":"<!DOCTYPEa SYSTEM''\u000D",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\u001F",
+"input":"<!DOCTYPEa SYSTEM''\u001F",
+"output":["ParseError", "ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM'' ",
+"input":"<!DOCTYPEa SYSTEM'' ",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM''!",
+"input":"<!DOCTYPEa SYSTEM''!",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\"",
+"input":"<!DOCTYPEa SYSTEM''\"",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''&",
+"input":"<!DOCTYPEa SYSTEM''&",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM'''",
+"input":"<!DOCTYPEa SYSTEM'''",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''-",
+"input":"<!DOCTYPEa SYSTEM''-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''/",
+"input":"<!DOCTYPEa SYSTEM''/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''0",
+"input":"<!DOCTYPEa SYSTEM''0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''1",
+"input":"<!DOCTYPEa SYSTEM''1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''9",
+"input":"<!DOCTYPEa SYSTEM''9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''<",
+"input":"<!DOCTYPEa SYSTEM''<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''=",
+"input":"<!DOCTYPEa SYSTEM''=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''>",
+"input":"<!DOCTYPEa SYSTEM''>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''?",
+"input":"<!DOCTYPEa SYSTEM''?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''@",
+"input":"<!DOCTYPEa SYSTEM''@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''A",
+"input":"<!DOCTYPEa SYSTEM''A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''B",
+"input":"<!DOCTYPEa SYSTEM''B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''Y",
+"input":"<!DOCTYPEa SYSTEM''Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''Z",
+"input":"<!DOCTYPEa SYSTEM''Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''`",
+"input":"<!DOCTYPEa SYSTEM''`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''a",
+"input":"<!DOCTYPEa SYSTEM''a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''b",
+"input":"<!DOCTYPEa SYSTEM''b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''y",
+"input":"<!DOCTYPEa SYSTEM''y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''z",
+"input":"<!DOCTYPEa SYSTEM''z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''{",
+"input":"<!DOCTYPEa SYSTEM''{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM''\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM''\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]},
+
+{"description":"<!DOCTYPEa SYSTEM'(",
+"input":"<!DOCTYPEa SYSTEM'(",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "(", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'-",
+"input":"<!DOCTYPEa SYSTEM'-",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "-", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'/",
+"input":"<!DOCTYPEa SYSTEM'/",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "/", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'0",
+"input":"<!DOCTYPEa SYSTEM'0",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "0", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'1",
+"input":"<!DOCTYPEa SYSTEM'1",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "1", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'9",
+"input":"<!DOCTYPEa SYSTEM'9",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "9", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'<",
+"input":"<!DOCTYPEa SYSTEM'<",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "<", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'=",
+"input":"<!DOCTYPEa SYSTEM'=",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "=", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'>",
+"input":"<!DOCTYPEa SYSTEM'>",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'?",
+"input":"<!DOCTYPEa SYSTEM'?",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "?", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'@",
+"input":"<!DOCTYPEa SYSTEM'@",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "@", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'A",
+"input":"<!DOCTYPEa SYSTEM'A",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "A", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'B",
+"input":"<!DOCTYPEa SYSTEM'B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "B", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'Y",
+"input":"<!DOCTYPEa SYSTEM'Y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Y", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'Z",
+"input":"<!DOCTYPEa SYSTEM'Z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "Z", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'`",
+"input":"<!DOCTYPEa SYSTEM'`",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "`", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'a",
+"input":"<!DOCTYPEa SYSTEM'a",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "a", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'b",
+"input":"<!DOCTYPEa SYSTEM'b",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "b", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'y",
+"input":"<!DOCTYPEa SYSTEM'y",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "y", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'z",
+"input":"<!DOCTYPEa SYSTEM'z",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "z", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'{",
+"input":"<!DOCTYPEa SYSTEM'{",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "{", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM'\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM'\uDBC0\uDC00",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "\uDBC0\uDC00", false]]},
+
+{"description":"<!DOCTYPEa SYSTEM(",
+"input":"<!DOCTYPEa SYSTEM(",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM-",
+"input":"<!DOCTYPEa SYSTEM-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM/",
+"input":"<!DOCTYPEa SYSTEM/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM0",
+"input":"<!DOCTYPEa SYSTEM0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM1",
+"input":"<!DOCTYPEa SYSTEM1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM9",
+"input":"<!DOCTYPEa SYSTEM9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM<",
+"input":"<!DOCTYPEa SYSTEM<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM=",
+"input":"<!DOCTYPEa SYSTEM=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM>",
+"input":"<!DOCTYPEa SYSTEM>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM?",
+"input":"<!DOCTYPEa SYSTEM?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM@",
+"input":"<!DOCTYPEa SYSTEM@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMA",
+"input":"<!DOCTYPEa SYSTEMA",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMB",
+"input":"<!DOCTYPEa SYSTEMB",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMY",
+"input":"<!DOCTYPEa SYSTEMY",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMZ",
+"input":"<!DOCTYPEa SYSTEMZ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM`",
+"input":"<!DOCTYPEa SYSTEM`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMa",
+"input":"<!DOCTYPEa SYSTEMa",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMb",
+"input":"<!DOCTYPEa SYSTEMb",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMy",
+"input":"<!DOCTYPEa SYSTEMy",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEMz",
+"input":"<!DOCTYPEa SYSTEMz",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM{",
+"input":"<!DOCTYPEa SYSTEM{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa SYSTEM\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa SYSTEM\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa Y",
+"input":"<!DOCTYPEa Y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa Z",
+"input":"<!DOCTYPEa Z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa `",
+"input":"<!DOCTYPEa `",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a",
+"input":"<!DOCTYPEa a",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\u0000",
+"input":"<!DOCTYPEa a\u0000",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\u0009",
+"input":"<!DOCTYPEa a\u0009",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\u000A",
+"input":"<!DOCTYPEa a\u000A",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\u000B",
+"input":"<!DOCTYPEa a\u000B",
+"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\u000C",
+"input":"<!DOCTYPEa a\u000C",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a ",
+"input":"<!DOCTYPEa a ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a!",
+"input":"<!DOCTYPEa a!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\"",
+"input":"<!DOCTYPEa a\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a&",
+"input":"<!DOCTYPEa a&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a'",
+"input":"<!DOCTYPEa a'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a-",
+"input":"<!DOCTYPEa a-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a/",
+"input":"<!DOCTYPEa a/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a0",
+"input":"<!DOCTYPEa a0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a1",
+"input":"<!DOCTYPEa a1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a9",
+"input":"<!DOCTYPEa a9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a<",
+"input":"<!DOCTYPEa a<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a=",
+"input":"<!DOCTYPEa a=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a>",
+"input":"<!DOCTYPEa a>",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a?",
+"input":"<!DOCTYPEa a?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a@",
+"input":"<!DOCTYPEa a@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa aA",
+"input":"<!DOCTYPEa aA",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa aB",
+"input":"<!DOCTYPEa aB",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa aY",
+"input":"<!DOCTYPEa aY",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa aZ",
+"input":"<!DOCTYPEa aZ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a`",
+"input":"<!DOCTYPEa a`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa aa",
+"input":"<!DOCTYPEa aa",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa ab",
+"input":"<!DOCTYPEa ab",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa ay",
+"input":"<!DOCTYPEa ay",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa az",
+"input":"<!DOCTYPEa az",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a{",
+"input":"<!DOCTYPEa a{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa a\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa a\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa b",
+"input":"<!DOCTYPEa b",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa y",
+"input":"<!DOCTYPEa y",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa z",
+"input":"<!DOCTYPEa z",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa {",
+"input":"<!DOCTYPEa {",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa \\uDBC0\\uDC00",
+"input":"<!DOCTYPEa \uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]},
+
+{"description":"<!DOCTYPEa!",
+"input":"<!DOCTYPEa!",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a!", null, null, false]]},
+
+{"description":"<!DOCTYPEa\"",
+"input":"<!DOCTYPEa\"",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\"", null, null, false]]},
+
+{"description":"<!DOCTYPEa&",
+"input":"<!DOCTYPEa&",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a&", null, null, false]]},
+
+{"description":"<!DOCTYPEa'",
+"input":"<!DOCTYPEa'",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a'", null, null, false]]},
+
+{"description":"<!DOCTYPEa-",
+"input":"<!DOCTYPEa-",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a-", null, null, false]]},
+
+{"description":"<!DOCTYPEa/",
+"input":"<!DOCTYPEa/",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a/", null, null, false]]},
+
+{"description":"<!DOCTYPEa0",
+"input":"<!DOCTYPEa0",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a0", null, null, false]]},
+
+{"description":"<!DOCTYPEa1",
+"input":"<!DOCTYPEa1",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a1", null, null, false]]},
+
+{"description":"<!DOCTYPEa9",
+"input":"<!DOCTYPEa9",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a9", null, null, false]]},
+
+{"description":"<!DOCTYPEa<",
+"input":"<!DOCTYPEa<",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a<", null, null, false]]},
+
+{"description":"<!DOCTYPEa=",
+"input":"<!DOCTYPEa=",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a=", null, null, false]]},
+
+{"description":"<!DOCTYPEa>",
+"input":"<!DOCTYPEa>",
+"output":["ParseError", ["DOCTYPE", "a", null, null, true]]},
+
+{"description":"<!DOCTYPEa?",
+"input":"<!DOCTYPEa?",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a?", null, null, false]]},
+
+{"description":"<!DOCTYPEa@",
+"input":"<!DOCTYPEa@",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a@", null, null, false]]},
+
+{"description":"<!DOCTYPEaA",
+"input":"<!DOCTYPEaA",
+"output":["ParseError", "ParseError", ["DOCTYPE", "aa", null, null, false]]},
+
+{"description":"<!DOCTYPEaB",
+"input":"<!DOCTYPEaB",
+"output":["ParseError", "ParseError", ["DOCTYPE", "ab", null, null, false]]},
+
+{"description":"<!DOCTYPEaY",
+"input":"<!DOCTYPEaY",
+"output":["ParseError", "ParseError", ["DOCTYPE", "ay", null, null, false]]},
+
+{"description":"<!DOCTYPEaZ",
+"input":"<!DOCTYPEaZ",
+"output":["ParseError", "ParseError", ["DOCTYPE", "az", null, null, false]]},
+
+{"description":"<!DOCTYPEa[",
+"input":"<!DOCTYPEa[",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a[", null, null, false]]},
+
+{"description":"<!DOCTYPEa`",
+"input":"<!DOCTYPEa`",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a`", null, null, false]]},
+
+{"description":"<!DOCTYPEaa",
+"input":"<!DOCTYPEaa",
+"output":["ParseError", "ParseError", ["DOCTYPE", "aa", null, null, false]]},
+
+{"description":"<!DOCTYPEab",
+"input":"<!DOCTYPEab",
+"output":["ParseError", "ParseError", ["DOCTYPE", "ab", null, null, false]]},
+
+{"description":"<!DOCTYPEay",
+"input":"<!DOCTYPEay",
+"output":["ParseError", "ParseError", ["DOCTYPE", "ay", null, null, false]]},
+
+{"description":"<!DOCTYPEaz",
+"input":"<!DOCTYPEaz",
+"output":["ParseError", "ParseError", ["DOCTYPE", "az", null, null, false]]},
+
+{"description":"<!DOCTYPEa{",
+"input":"<!DOCTYPEa{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a{", null, null, false]]},
+
+{"description":"<!DOCTYPEa\\uDBC0\\uDC00",
+"input":"<!DOCTYPEa\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "a\uDBC0\uDC00", null, null, false]]},
+
+{"description":"<!DOCTYPEb",
+"input":"<!DOCTYPEb",
+"output":["ParseError", "ParseError", ["DOCTYPE", "b", null, null, false]]},
+
+{"description":"<!DOCTYPEy",
+"input":"<!DOCTYPEy",
+"output":["ParseError", "ParseError", ["DOCTYPE", "y", null, null, false]]},
+
+{"description":"<!DOCTYPEz",
+"input":"<!DOCTYPEz",
+"output":["ParseError", "ParseError", ["DOCTYPE", "z", null, null, false]]},
+
+{"description":"<!DOCTYPE{",
+"input":"<!DOCTYPE{",
+"output":["ParseError", "ParseError", ["DOCTYPE", "{", null, null, false]]},
+
+{"description":"<!DOCTYPE\\uDBC0\\uDC00",
+"input":"<!DOCTYPE\uDBC0\uDC00",
+"output":["ParseError", "ParseError", ["DOCTYPE", "\uDBC0\uDC00", null, null, false]]},
+
+{"description":"<!Y",
+"input":"<!Y",
+"output":["ParseError", ["Comment", "Y"]]},
+
+{"description":"<!Z",
+"input":"<!Z",
+"output":["ParseError", ["Comment", "Z"]]},
+
+{"description":"<!`",
+"input":"<!`",
+"output":["ParseError", ["Comment", "`"]]},
+
+{"description":"<!a",
+"input":"<!a",
+"output":["ParseError", ["Comment", "a"]]},
+
+{"description":"<!b",
+"input":"<!b",
+"output":["ParseError", ["Comment", "b"]]},
+
+{"description":"<!y",
+"input":"<!y",
+"output":["ParseError", ["Comment", "y"]]},
+
+{"description":"<!z",
+"input":"<!z",
+"output":["ParseError", ["Comment", "z"]]},
+
+{"description":"<!{",
+"input":"<!{",
+"output":["ParseError", ["Comment", "{"]]},
+
+{"description":"<!\\uDBC0\\uDC00",
+"input":"<!\uDBC0\uDC00",
+"output":["ParseError", ["Comment", "\uDBC0\uDC00"]]},
+
+{"description":"<\"",
+"input":"<\"",
+"output":["ParseError", ["Character", "<\""]]},
+
+{"description":"<&",
+"input":"<&",
+"output":["ParseError", ["Character", "<&"]]},
+
+{"description":"<'",
+"input":"<'",
+"output":["ParseError", ["Character", "<'"]]},
+
+{"description":"<-",
+"input":"<-",
+"output":["ParseError", ["Character", "<-"]]},
+
+{"description":"<.",
+"input":"<.",
+"output":["ParseError", ["Character", "<."]]},
+
+{"description":"</",
+"input":"</",
+"output":["ParseError", ["Character", "</"]]},
+
+{"description":"</\\u0000",
+"input":"</\u0000",
+"output":["ParseError", ["Comment", "\uFFFD"]]},
+
+{"description":"</\\u0009",
+"input":"</\u0009",
+"output":["ParseError", ["Comment", "\u0009"]]},
+
+{"description":"</\\u000A",
+"input":"</\u000A",
+"output":["ParseError", ["Comment", "\u000A"]]},
+
+{"description":"</\\u000B",
+"input":"</\u000B",
+"output":["ParseError", "ParseError", ["Comment", "\u000B"]]},
+
+{"description":"</\\u000C",
+"input":"</\u000C",
+"output":["ParseError", ["Comment", "\u000C"]]},
+
+{"description":"</ ",
+"input":"</ ",
+"output":["ParseError", ["Comment", " "]]},
+
+{"description":"</!",
+"input":"</!",
+"output":["ParseError", ["Comment", "!"]]},
+
+{"description":"</\"",
+"input":"</\"",
+"output":["ParseError", ["Comment", "\""]]},
+
+{"description":"</&",
+"input":"</&",
+"output":["ParseError", ["Comment", "&"]]},
+
+{"description":"</'",
+"input":"</'",
+"output":["ParseError", ["Comment", "'"]]},
+
+{"description":"</-",
+"input":"</-",
+"output":["ParseError", ["Comment", "-"]]},
+
+{"description":"<//",
+"input":"<//",
+"output":["ParseError", ["Comment", "/"]]},
+
+{"description":"</0",
+"input":"</0",
+"output":["ParseError", ["Comment", "0"]]},
+
+{"description":"</1",
+"input":"</1",
+"output":["ParseError", ["Comment", "1"]]},
+
+{"description":"</9",
+"input":"</9",
+"output":["ParseError", ["Comment", "9"]]},
+
+{"description":"</<",
+"input":"</<",
+"output":["ParseError", ["Comment", "<"]]},
+
+{"description":"</=",
+"input":"</=",
+"output":["ParseError", ["Comment", "="]]},
+
+{"description":"</>",
+"input":"</>",
+"output":["ParseError"]},
+
+{"description":"</?",
+"input":"</?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"</@",
+"input":"</@",
+"output":["ParseError", ["Comment", "@"]]},
+
+{"description":"</A>",
+"input":"</A>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</B>",
+"input":"</B>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</Y>",
+"input":"</Y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</Z>",
+"input":"</Z>",
+"output":[["EndTag", "z"]]},
+
+{"description":"</[",
+"input":"</[",
+"output":["ParseError", ["Comment", "["]]},
+
+{"description":"</`",
+"input":"</`",
+"output":["ParseError", ["Comment", "`"]]},
+
+{"description":"</a>",
+"input":"</a>",
+"output":[["EndTag", "a"]]},
+
+{"description":"</b>",
+"input":"</b>",
+"output":[["EndTag", "b"]]},
+
+{"description":"</y>",
+"input":"</y>",
+"output":[["EndTag", "y"]]},
+
+{"description":"</z>",
+"input":"</z>",
+"output":[["EndTag", "z"]]},
+
+{"description":"</{",
+"input":"</{",
+"output":["ParseError", ["Comment", "{"]]},
+
+{"description":"</\\uDBC0\\uDC00",
+"input":"</\uDBC0\uDC00",
+"output":["ParseError", ["Comment", "\uDBC0\uDC00"]]},
+
+{"description":"<0",
+"input":"<0",
+"output":["ParseError", ["Character", "<0"]]},
+
+{"description":"<1",
+"input":"<1",
+"output":["ParseError", ["Character", "<1"]]},
+
+{"description":"<9",
+"input":"<9",
+"output":["ParseError", ["Character", "<9"]]},
+
+{"description":"<<",
+"input":"<<",
+"output":["ParseError", ["Character", "<"], "ParseError", ["Character", "<"]]},
+
+{"description":"<=",
+"input":"<=",
+"output":["ParseError", ["Character", "<="]]},
+
+{"description":"<>",
+"input":"<>",
+"output":["ParseError", ["Character", "<>"]]},
+
+{"description":"<?",
+"input":"<?",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<?\\u0000",
+"input":"<?\u0000",
+"output":["ParseError", ["Comment", "?\uFFFD"]]},
+
+{"description":"<?\\u0009",
+"input":"<?\u0009",
+"output":["ParseError", ["Comment", "?\u0009"]]},
+
+{"description":"<?\\u000A",
+"input":"<?\u000A",
+"output":["ParseError", ["Comment", "?\u000A"]]},
+
+{"description":"<?\\u000B",
+"input":"<?\u000B",
+"output":["ParseError", "ParseError", ["Comment", "?\u000B"]]},
+
+{"description":"<?\\u000C",
+"input":"<?\u000C",
+"output":["ParseError", ["Comment", "?\u000C"]]},
+
+{"description":"<? ",
+"input":"<? ",
+"output":["ParseError", ["Comment", "? "]]},
+
+{"description":"<?!",
+"input":"<?!",
+"output":["ParseError", ["Comment", "?!"]]},
+
+{"description":"<?\"",
+"input":"<?\"",
+"output":["ParseError", ["Comment", "?\""]]},
+
+{"description":"<?&",
+"input":"<?&",
+"output":["ParseError", ["Comment", "?&"]]},
+
+{"description":"<?'",
+"input":"<?'",
+"output":["ParseError", ["Comment", "?'"]]},
+
+{"description":"<?-",
+"input":"<?-",
+"output":["ParseError", ["Comment", "?-"]]},
+
+{"description":"<?/",
+"input":"<?/",
+"output":["ParseError", ["Comment", "?/"]]},
+
+{"description":"<?0",
+"input":"<?0",
+"output":["ParseError", ["Comment", "?0"]]},
+
+{"description":"<?1",
+"input":"<?1",
+"output":["ParseError", ["Comment", "?1"]]},
+
+{"description":"<?9",
+"input":"<?9",
+"output":["ParseError", ["Comment", "?9"]]},
+
+{"description":"<?<",
+"input":"<?<",
+"output":["ParseError", ["Comment", "?<"]]},
+
+{"description":"<?=",
+"input":"<?=",
+"output":["ParseError", ["Comment", "?="]]},
+
+{"description":"<?>",
+"input":"<?>",
+"output":["ParseError", ["Comment", "?"]]},
+
+{"description":"<??",
+"input":"<??",
+"output":["ParseError", ["Comment", "??"]]},
+
+{"description":"<?@",
+"input":"<?@",
+"output":["ParseError", ["Comment", "?@"]]},
+
+{"description":"<?A",
+"input":"<?A",
+"output":["ParseError", ["Comment", "?A"]]},
+
+{"description":"<?B",
+"input":"<?B",
+"output":["ParseError", ["Comment", "?B"]]},
+
+{"description":"<?Y",
+"input":"<?Y",
+"output":["ParseError", ["Comment", "?Y"]]},
+
+{"description":"<?Z",
+"input":"<?Z",
+"output":["ParseError", ["Comment", "?Z"]]},
+
+{"description":"<?`",
+"input":"<?`",
+"output":["ParseError", ["Comment", "?`"]]},
+
+{"description":"<?a",
+"input":"<?a",
+"output":["ParseError", ["Comment", "?a"]]},
+
+{"description":"<?b",
+"input":"<?b",
+"output":["ParseError", ["Comment", "?b"]]},
+
+{"description":"<?y",
+"input":"<?y",
+"output":["ParseError", ["Comment", "?y"]]},
+
+{"description":"<?z",
+"input":"<?z",
+"output":["ParseError", ["Comment", "?z"]]},
+
+{"description":"<?{",
+"input":"<?{",
+"output":["ParseError", ["Comment", "?{"]]},
+
+{"description":"<?\\uDBC0\\uDC00",
+"input":"<?\uDBC0\uDC00",
+"output":["ParseError", ["Comment", "?\uDBC0\uDC00"]]},
+
+{"description":"<@",
+"input":"<@",
+"output":["ParseError", ["Character", "<@"]]},
+
+{"description":"<A>",
+"input":"<A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<B>",
+"input":"<B>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<Y>",
+"input":"<Y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<Z>",
+"input":"<Z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<[",
+"input":"<[",
+"output":["ParseError", ["Character", "<["]]},
+
+{"description":"<`",
+"input":"<`",
+"output":["ParseError", ["Character", "<`"]]},
+
+{"description":"<a>",
+"input":"<a>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u0000>",
+"input":"<a\u0000>",
+"output":["ParseError", ["StartTag", "a\uFFFD", {}]]},
+
+{"description":"<a\\u0008>",
+"input":"<a\u0008>",
+"output":["ParseError", ["StartTag", "a\u0008", {}]]},
+
+{"description":"<a\\u0009>",
+"input":"<a\u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000A>",
+"input":"<a\u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000B>",
+"input":"<a\u000B>",
+"output":["ParseError", ["StartTag", "a\u000B", {}]]},
+
+{"description":"<a\\u000C>",
+"input":"<a\u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u000D>",
+"input":"<a\u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a\\u001F>",
+"input":"<a\u001F>",
+"output":["ParseError", ["StartTag", "a\u001F", {}]]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u0000>",
+"input":"<a \u0000>",
+"output":["ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
+
+{"description":"<a \\u0008>",
+"input":"<a \u0008>",
+"output":["ParseError", ["StartTag", "a", {"\u0008":""}]]},
+
+{"description":"<a \\u0009>",
+"input":"<a \u0009>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000A>",
+"input":"<a \u000A>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000B>",
+"input":"<a \u000B>",
+"output":["ParseError", ["StartTag", "a", {"\u000B":""}]]},
+
+{"description":"<a \\u000C>",
+"input":"<a \u000C>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u000D>",
+"input":"<a \u000D>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a \\u001F>",
+"input":"<a \u001F>",
+"output":["ParseError", ["StartTag", "a", {"\u001F":""}]]},
+
+{"description":"<a  >",
+"input":"<a  >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a !>",
+"input":"<a !>",
+"output":[["StartTag", "a", {"!":""}]]},
+
+{"description":"<a \">",
+"input":"<a \">",
+"output":["ParseError", ["StartTag", "a", {"\"":""}]]},
+
+{"description":"<a #>",
+"input":"<a #>",
+"output":[["StartTag", "a", {"#":""}]]},
+
+{"description":"<a &>",
+"input":"<a &>",
+"output":[["StartTag", "a", {"&":""}]]},
+
+{"description":"<a '>",
+"input":"<a '>",
+"output":["ParseError", ["StartTag", "a", {"'":""}]]},
+
+{"description":"<a (>",
+"input":"<a (>",
+"output":[["StartTag", "a", {"(":""}]]},
+
+{"description":"<a ->",
+"input":"<a ->",
+"output":[["StartTag", "a", {"-":""}]]},
+
+{"description":"<a .>",
+"input":"<a .>",
+"output":[["StartTag", "a", {".":""}]]},
+
+{"description":"<a />",
+"input":"<a />",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a 0>",
+"input":"<a 0>",
+"output":[["StartTag", "a", {"0":""}]]},
+
+{"description":"<a 1>",
+"input":"<a 1>",
+"output":[["StartTag", "a", {"1":""}]]},
+
+{"description":"<a 9>",
+"input":"<a 9>",
+"output":[["StartTag", "a", {"9":""}]]},
+
+{"description":"<a <>",
+"input":"<a <>",
+"output":["ParseError", ["StartTag", "a", {"<":""}]]},
+
+{"description":"<a =>",
+"input":"<a =>",
+"output":["ParseError", ["StartTag", "a", {"=":""}]]},
+
+{"description":"<a >",
+"input":"<a >",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a ?>",
+"input":"<a ?>",
+"output":[["StartTag", "a", {"?":""}]]},
+
+{"description":"<a @>",
+"input":"<a @>",
+"output":[["StartTag", "a", {"@":""}]]},
+
+{"description":"<a A>",
+"input":"<a A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a B>",
+"input":"<a B>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a Y>",
+"input":"<a Y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a Z>",
+"input":"<a Z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a [>",
+"input":"<a [>",
+"output":[["StartTag", "a", {"[":""}]]},
+
+{"description":"<a `>",
+"input":"<a `>",
+"output":[["StartTag", "a", {"`":""}]]},
+
+{"description":"<a a>",
+"input":"<a a>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u0000>",
+"input":"<a a\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a\uFFFD":""}]]},
+
+{"description":"<a a\\u0008>",
+"input":"<a a\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a\u0008":""}]]},
+
+{"description":"<a a\\u0009>",
+"input":"<a a\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000A>",
+"input":"<a a\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000B>",
+"input":"<a a\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a\u000B":""}]]},
+
+{"description":"<a a\\u000C>",
+"input":"<a a\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u000D>",
+"input":"<a a\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a\\u001F>",
+"input":"<a a\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a\u001F":""}]]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u0000>",
+"input":"<a a \u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
+
+{"description":"<a a \\u0008>",
+"input":"<a a \u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
+
+{"description":"<a a \\u0009>",
+"input":"<a a \u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000A>",
+"input":"<a a \u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000B>",
+"input":"<a a \u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
+
+{"description":"<a a \\u000C>",
+"input":"<a a \u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u000D>",
+"input":"<a a \u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a \\u001F>",
+"input":"<a a \u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
+
+{"description":"<a a  >",
+"input":"<a a  >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a !>",
+"input":"<a a !>",
+"output":[["StartTag", "a", {"a":"", "!":""}]]},
+
+{"description":"<a a \">",
+"input":"<a a \">",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
+
+{"description":"<a a #>",
+"input":"<a a #>",
+"output":[["StartTag", "a", {"a":"", "#":""}]]},
+
+{"description":"<a a &>",
+"input":"<a a &>",
+"output":[["StartTag", "a", {"a":"", "&":""}]]},
+
+{"description":"<a a '>",
+"input":"<a a '>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
+
+{"description":"<a a (>",
+"input":"<a a (>",
+"output":[["StartTag", "a", {"a":"", "(":""}]]},
+
+{"description":"<a a ->",
+"input":"<a a ->",
+"output":[["StartTag", "a", {"a":"", "-":""}]]},
+
+{"description":"<a a .>",
+"input":"<a a .>",
+"output":[["StartTag", "a", {"a":"", ".":""}]]},
+
+{"description":"<a a />",
+"input":"<a a />",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a 0>",
+"input":"<a a 0>",
+"output":[["StartTag", "a", {"a":"", "0":""}]]},
+
+{"description":"<a a 1>",
+"input":"<a a 1>",
+"output":[["StartTag", "a", {"a":"", "1":""}]]},
+
+{"description":"<a a 9>",
+"input":"<a a 9>",
+"output":[["StartTag", "a", {"a":"", "9":""}]]},
+
+{"description":"<a a <>",
+"input":"<a a <>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
+
+{"description":"<a a =>",
+"input":"<a a =>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a >",
+"input":"<a a >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a ?>",
+"input":"<a a ?>",
+"output":[["StartTag", "a", {"a":"", "?":""}]]},
+
+{"description":"<a a @>",
+"input":"<a a @>",
+"output":[["StartTag", "a", {"a":"", "@":""}]]},
+
+{"description":"<a a A>",
+"input":"<a a A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a B>",
+"input":"<a a B>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a Y>",
+"input":"<a a Y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a Z>",
+"input":"<a a Z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a [>",
+"input":"<a a [>",
+"output":[["StartTag", "a", {"a":"", "[":""}]]},
+
+{"description":"<a a `>",
+"input":"<a a `>",
+"output":[["StartTag", "a", {"a":"", "`":""}]]},
+
+{"description":"<a a a>",
+"input":"<a a a>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a b>",
+"input":"<a a b>",
+"output":[["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a y>",
+"input":"<a a y>",
+"output":[["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a z>",
+"input":"<a a z>",
+"output":[["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a {>",
+"input":"<a a {>",
+"output":[["StartTag", "a", {"a":"", "{":""}]]},
+
+{"description":"<a a \\uDBC0\\uDC00>",
+"input":"<a a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
+
+{"description":"<a a!>",
+"input":"<a a!>",
+"output":[["StartTag", "a", {"a!":""}]]},
+
+{"description":"<a a\">",
+"input":"<a a\">",
+"output":["ParseError", ["StartTag", "a", {"a\"":""}]]},
+
+{"description":"<a a#>",
+"input":"<a a#>",
+"output":[["StartTag", "a", {"a#":""}]]},
+
+{"description":"<a a&>",
+"input":"<a a&>",
+"output":[["StartTag", "a", {"a&":""}]]},
+
+{"description":"<a a'>",
+"input":"<a a'>",
+"output":["ParseError", ["StartTag", "a", {"a'":""}]]},
+
+{"description":"<a a(>",
+"input":"<a a(>",
+"output":[["StartTag", "a", {"a(":""}]]},
+
+{"description":"<a a->",
+"input":"<a a->",
+"output":[["StartTag", "a", {"a-":""}]]},
+
+{"description":"<a a.>",
+"input":"<a a.>",
+"output":[["StartTag", "a", {"a.":""}]]},
+
+{"description":"<a a/>",
+"input":"<a a/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a0>",
+"input":"<a a0>",
+"output":[["StartTag", "a", {"a0":""}]]},
+
+{"description":"<a a1>",
+"input":"<a a1>",
+"output":[["StartTag", "a", {"a1":""}]]},
+
+{"description":"<a a9>",
+"input":"<a a9>",
+"output":[["StartTag", "a", {"a9":""}]]},
+
+{"description":"<a a<>",
+"input":"<a a<>",
+"output":["ParseError", ["StartTag", "a", {"a<":""}]]},
+
+{"description":"<a a=>",
+"input":"<a a=>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u0000>",
+"input":"<a a=\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a=\\u0008>",
+"input":"<a a=\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u0008"}]]},
+
+{"description":"<a a=\\u0009>",
+"input":"<a a=\u0009>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000A>",
+"input":"<a a=\u000A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000B>",
+"input":"<a a=\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a=\\u000C>",
+"input":"<a a=\u000C>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u000D>",
+"input":"<a a=\u000D>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\\u001F>",
+"input":"<a a=\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u001F"}]]},
+
+{"description":"<a a= >",
+"input":"<a a= >",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=!>",
+"input":"<a a=!>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"\\u0000\">",
+"input":"<a a=\"\u0000\">",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a=\"\\u0009\">",
+"input":"<a a=\"\u0009\">",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a=\"\\u000A\">",
+"input":"<a a=\"\u000A\">",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a=\"\\u000B\">",
+"input":"<a a=\"\u000B\">",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a=\"\\u000C\">",
+"input":"<a a=\"\u000C\">",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=\" \">",
+"input":"<a a=\" \">",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a=\"!\">",
+"input":"<a a=\"!\">",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a=\"\">",
+"input":"<a a=\"\">",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=\"#\">",
+"input":"<a a=\"#\">",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=\"%\">",
+"input":"<a a=\"%\">",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=\"&\">",
+"input":"<a a=\"&\">",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=\"'\">",
+"input":"<a a=\"'\">",
+"output":[["StartTag", "a", {"a":"'"}]]},
+
+{"description":"<a a=\"-\">",
+"input":"<a a=\"-\">",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=\"/\">",
+"input":"<a a=\"/\">",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=\"0\">",
+"input":"<a a=\"0\">",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=\"1\">",
+"input":"<a a=\"1\">",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=\"9\">",
+"input":"<a a=\"9\">",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=\"<\">",
+"input":"<a a=\"<\">",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a=\"=\">",
+"input":"<a a=\"=\">",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a=\">\">",
+"input":"<a a=\">\">",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a=\"?\">",
+"input":"<a a=\"?\">",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=\"@\">",
+"input":"<a a=\"@\">",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=\"A\">",
+"input":"<a a=\"A\">",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=\"B\">",
+"input":"<a a=\"B\">",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=\"Y\">",
+"input":"<a a=\"Y\">",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=\"Z\">",
+"input":"<a a=\"Z\">",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=\"`\">",
+"input":"<a a=\"`\">",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a=\"a\">",
+"input":"<a a=\"a\">",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=\"b\">",
+"input":"<a a=\"b\">",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=\"y\">",
+"input":"<a a=\"y\">",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=\"z\">",
+"input":"<a a=\"z\">",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a=\"{\">",
+"input":"<a a=\"{\">",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\"\\uDBC0\\uDC00\">",
+"input":"<a a=\"\uDBC0\uDC00\">",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=#>",
+"input":"<a a=#>",
+"output":[["StartTag", "a", {"a":"#"}]]},
+
+{"description":"<a a=%>",
+"input":"<a a=%>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a=&>",
+"input":"<a a=&>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a='\\u0000'>",
+"input":"<a a='\u0000'>",
+"output":["ParseError", ["StartTag", "a", {"a":"\uFFFD"}]]},
+
+{"description":"<a a='\\u0009'>",
+"input":"<a a='\u0009'>",
+"output":[["StartTag", "a", {"a":"\u0009"}]]},
+
+{"description":"<a a='\\u000A'>",
+"input":"<a a='\u000A'>",
+"output":[["StartTag", "a", {"a":"\u000A"}]]},
+
+{"description":"<a a='\\u000B'>",
+"input":"<a a='\u000B'>",
+"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]},
+
+{"description":"<a a='\\u000C'>",
+"input":"<a a='\u000C'>",
+"output":[["StartTag", "a", {"a":"\u000C"}]]},
+
+{"description":"<a a=' '>",
+"input":"<a a=' '>",
+"output":[["StartTag", "a", {"a":" "}]]},
+
+{"description":"<a a='!'>",
+"input":"<a a='!'>",
+"output":[["StartTag", "a", {"a":"!"}]]},
+
+{"description":"<a a='\"'>",
+"input":"<a a='\"'>",
+"output":[["StartTag", "a", {"a":"\""}]]},
+
+{"description":"<a a='%'>",
+"input":"<a a='%'>",
+"output":[["StartTag", "a", {"a":"%"}]]},
+
+{"description":"<a a='&'>",
+"input":"<a a='&'>",
+"output":[["StartTag", "a", {"a":"&"}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u0000>",
+"input":"<a a=''\u0000>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]]},
+
+{"description":"<a a=''\\u0008>",
+"input":"<a a=''\u0008>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]},
+
+{"description":"<a a=''\\u0009>",
+"input":"<a a=''\u0009>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000A>",
+"input":"<a a=''\u000A>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000B>",
+"input":"<a a=''\u000B>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]},
+
+{"description":"<a a=''\\u000C>",
+"input":"<a a=''\u000C>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u000D>",
+"input":"<a a=''\u000D>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''\\u001F>",
+"input":"<a a=''\u001F>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]},
+
+{"description":"<a a='' >",
+"input":"<a a='' >",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''!>",
+"input":"<a a=''!>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "!":""}]]},
+
+{"description":"<a a=''\">",
+"input":"<a a=''\">",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]]},
+
+{"description":"<a a=''&>",
+"input":"<a a=''&>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "&":""}]]},
+
+{"description":"<a a='''>",
+"input":"<a a='''>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]]},
+
+{"description":"<a a=''->",
+"input":"<a a=''->",
+"output":["ParseError", ["StartTag", "a", {"a":"", "-":""}]]},
+
+{"description":"<a a=''.>",
+"input":"<a a=''.>",
+"output":["ParseError", ["StartTag", "a", {"a":"", ".":""}]]},
+
+{"description":"<a a=''/>",
+"input":"<a a=''/>",
+"output":[["StartTag", "a", {"a":""}, true]]},
+
+{"description":"<a a=''0>",
+"input":"<a a=''0>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "0":""}]]},
+
+{"description":"<a a=''1>",
+"input":"<a a=''1>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "1":""}]]},
+
+{"description":"<a a=''9>",
+"input":"<a a=''9>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "9":""}]]},
+
+{"description":"<a a=''<>",
+"input":"<a a=''<>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "<":""}]]},
+
+{"description":"<a a=''=>",
+"input":"<a a=''=>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "=":""}]]},
+
+{"description":"<a a=''>",
+"input":"<a a=''>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''?>",
+"input":"<a a=''?>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "?":""}]]},
+
+{"description":"<a a=''@>",
+"input":"<a a=''@>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "@":""}]]},
+
+{"description":"<a a=''A>",
+"input":"<a a=''A>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''B>",
+"input":"<a a=''B>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a=''Y>",
+"input":"<a a=''Y>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a=''Z>",
+"input":"<a a=''Z>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a=''`>",
+"input":"<a a=''`>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "`":""}]]},
+
+{"description":"<a a=''a>",
+"input":"<a a=''a>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=''b>",
+"input":"<a a=''b>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]]},
+
+{"description":"<a a=''y>",
+"input":"<a a=''y>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]]},
+
+{"description":"<a a=''z>",
+"input":"<a a=''z>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]]},
+
+{"description":"<a a=''{>",
+"input":"<a a=''{>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "{":""}]]},
+
+{"description":"<a a=''\\uDBC0\\uDC00>",
+"input":"<a a=''\uDBC0\uDC00>",
+"output":["ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]]},
+
+{"description":"<a a='('>",
+"input":"<a a='('>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a='-'>",
+"input":"<a a='-'>",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a='/'>",
+"input":"<a a='/'>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a='0'>",
+"input":"<a a='0'>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a='1'>",
+"input":"<a a='1'>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a='9'>",
+"input":"<a a='9'>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a='<'>",
+"input":"<a a='<'>",
+"output":[["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a='='>",
+"input":"<a a='='>",
+"output":[["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a='>'>",
+"input":"<a a='>'>",
+"output":[["StartTag", "a", {"a":">"}]]},
+
+{"description":"<a a='?'>",
+"input":"<a a='?'>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a='@'>",
+"input":"<a a='@'>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a='A'>",
+"input":"<a a='A'>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a='B'>",
+"input":"<a a='B'>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a='Y'>",
+"input":"<a a='Y'>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a='Z'>",
+"input":"<a a='Z'>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a='`'>",
+"input":"<a a='`'>",
+"output":[["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a='a'>",
+"input":"<a a='a'>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a='b'>",
+"input":"<a a='b'>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a='y'>",
+"input":"<a a='y'>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a='z'>",
+"input":"<a a='z'>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a='{'>",
+"input":"<a a='{'>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a='\\uDBC0\\uDC00'>",
+"input":"<a a='\uDBC0\uDC00'>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a=(>",
+"input":"<a a=(>",
+"output":[["StartTag", "a", {"a":"("}]]},
+
+{"description":"<a a=->",
+"input":"<a a=->",
+"output":[["StartTag", "a", {"a":"-"}]]},
+
+{"description":"<a a=/>",
+"input":"<a a=/>",
+"output":[["StartTag", "a", {"a":"/"}]]},
+
+{"description":"<a a=0>",
+"input":"<a a=0>",
+"output":[["StartTag", "a", {"a":"0"}]]},
+
+{"description":"<a a=1>",
+"input":"<a a=1>",
+"output":[["StartTag", "a", {"a":"1"}]]},
+
+{"description":"<a a=9>",
+"input":"<a a=9>",
+"output":[["StartTag", "a", {"a":"9"}]]},
+
+{"description":"<a a=<>",
+"input":"<a a=<>",
+"output":["ParseError", ["StartTag", "a", {"a":"<"}]]},
+
+{"description":"<a a==>",
+"input":"<a a==>",
+"output":["ParseError", ["StartTag", "a", {"a":"="}]]},
+
+{"description":"<a a=>",
+"input":"<a a=>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a=?>",
+"input":"<a a=?>",
+"output":[["StartTag", "a", {"a":"?"}]]},
+
+{"description":"<a a=@>",
+"input":"<a a=@>",
+"output":[["StartTag", "a", {"a":"@"}]]},
+
+{"description":"<a a=A>",
+"input":"<a a=A>",
+"output":[["StartTag", "a", {"a":"A"}]]},
+
+{"description":"<a a=B>",
+"input":"<a a=B>",
+"output":[["StartTag", "a", {"a":"B"}]]},
+
+{"description":"<a a=Y>",
+"input":"<a a=Y>",
+"output":[["StartTag", "a", {"a":"Y"}]]},
+
+{"description":"<a a=Z>",
+"input":"<a a=Z>",
+"output":[["StartTag", "a", {"a":"Z"}]]},
+
+{"description":"<a a=`>",
+"input":"<a a=`>",
+"output":["ParseError", ["StartTag", "a", {"a":"`"}]]},
+
+{"description":"<a a=a>",
+"input":"<a a=a>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u0000>",
+"input":"<a a=a\u0000>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\uFFFD"}]]},
+
+{"description":"<a a=a\\u0008>",
+"input":"<a a=a\u0008>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u0008"}]]},
+
+{"description":"<a a=a\\u0009>",
+"input":"<a a=a\u0009>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000A>",
+"input":"<a a=a\u000A>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000B>",
+"input":"<a a=a\u000B>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u000B"}]]},
+
+{"description":"<a a=a\\u000C>",
+"input":"<a a=a\u000C>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u000D>",
+"input":"<a a=a\u000D>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a\\u001F>",
+"input":"<a a=a\u001F>",
+"output":["ParseError", ["StartTag", "a", {"a":"a\u001F"}]]},
+
+{"description":"<a a=a >",
+"input":"<a a=a >",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a!>",
+"input":"<a a=a!>",
+"output":[["StartTag", "a", {"a":"a!"}]]},
+
+{"description":"<a a=a\">",
+"input":"<a a=a\">",
+"output":["ParseError", ["StartTag", "a", {"a":"a\""}]]},
+
+{"description":"<a a=a#>",
+"input":"<a a=a#>",
+"output":[["StartTag", "a", {"a":"a#"}]]},
+
+{"description":"<a a=a%>",
+"input":"<a a=a%>",
+"output":[["StartTag", "a", {"a":"a%"}]]},
+
+{"description":"<a a=a&>",
+"input":"<a a=a&>",
+"output":[["StartTag", "a", {"a":"a&"}]]},
+
+{"description":"<a a=a'>",
+"input":"<a a=a'>",
+"output":["ParseError", ["StartTag", "a", {"a":"a'"}]]},
+
+{"description":"<a a=a(>",
+"input":"<a a=a(>",
+"output":[["StartTag", "a", {"a":"a("}]]},
+
+{"description":"<a a=a->",
+"input":"<a a=a->",
+"output":[["StartTag", "a", {"a":"a-"}]]},
+
+{"description":"<a a=a/>",
+"input":"<a a=a/>",
+"output":[["StartTag", "a", {"a":"a/"}]]},
+
+{"description":"<a a=a0>",
+"input":"<a a=a0>",
+"output":[["StartTag", "a", {"a":"a0"}]]},
+
+{"description":"<a a=a1>",
+"input":"<a a=a1>",
+"output":[["StartTag", "a", {"a":"a1"}]]},
+
+{"description":"<a a=a9>",
+"input":"<a a=a9>",
+"output":[["StartTag", "a", {"a":"a9"}]]},
+
+{"description":"<a a=a<>",
+"input":"<a a=a<>",
+"output":["ParseError", ["StartTag", "a", {"a":"a<"}]]},
+
+{"description":"<a a=a=>",
+"input":"<a a=a=>",
+"output":["ParseError", ["StartTag", "a", {"a":"a="}]]},
+
+{"description":"<a a=a>",
+"input":"<a a=a>",
+"output":[["StartTag", "a", {"a":"a"}]]},
+
+{"description":"<a a=a?>",
+"input":"<a a=a?>",
+"output":[["StartTag", "a", {"a":"a?"}]]},
+
+{"description":"<a a=a@>",
+"input":"<a a=a@>",
+"output":[["StartTag", "a", {"a":"a@"}]]},
+
+{"description":"<a a=aA>",
+"input":"<a a=aA>",
+"output":[["StartTag", "a", {"a":"aA"}]]},
+
+{"description":"<a a=aB>",
+"input":"<a a=aB>",
+"output":[["StartTag", "a", {"a":"aB"}]]},
+
+{"description":"<a a=aY>",
+"input":"<a a=aY>",
+"output":[["StartTag", "a", {"a":"aY"}]]},
+
+{"description":"<a a=aZ>",
+"input":"<a a=aZ>",
+"output":[["StartTag", "a", {"a":"aZ"}]]},
+
+{"description":"<a a=a`>",
+"input":"<a a=a`>",
+"output":["ParseError", ["StartTag", "a", {"a":"a`"}]]},
+
+{"description":"<a a=aa>",
+"input":"<a a=aa>",
+"output":[["StartTag", "a", {"a":"aa"}]]},
+
+{"description":"<a a=ab>",
+"input":"<a a=ab>",
+"output":[["StartTag", "a", {"a":"ab"}]]},
+
+{"description":"<a a=ay>",
+"input":"<a a=ay>",
+"output":[["StartTag", "a", {"a":"ay"}]]},
+
+{"description":"<a a=az>",
+"input":"<a a=az>",
+"output":[["StartTag", "a", {"a":"az"}]]},
+
+{"description":"<a a=a{>",
+"input":"<a a=a{>",
+"output":[["StartTag", "a", {"a":"a{"}]]},
+
+{"description":"<a a=a\\uDBC0\\uDC00>",
+"input":"<a a=a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"a\uDBC0\uDC00"}]]},
+
+{"description":"<a a=b>",
+"input":"<a a=b>",
+"output":[["StartTag", "a", {"a":"b"}]]},
+
+{"description":"<a a=y>",
+"input":"<a a=y>",
+"output":[["StartTag", "a", {"a":"y"}]]},
+
+{"description":"<a a=z>",
+"input":"<a a=z>",
+"output":[["StartTag", "a", {"a":"z"}]]},
+
+{"description":"<a a={>",
+"input":"<a a={>",
+"output":[["StartTag", "a", {"a":"{"}]]},
+
+{"description":"<a a=\\uDBC0\\uDC00>",
+"input":"<a a=\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a":"\uDBC0\uDC00"}]]},
+
+{"description":"<a a>",
+"input":"<a a>",
+"output":[["StartTag", "a", {"a":""}]]},
+
+{"description":"<a a?>",
+"input":"<a a?>",
+"output":[["StartTag", "a", {"a?":""}]]},
+
+{"description":"<a a@>",
+"input":"<a a@>",
+"output":[["StartTag", "a", {"a@":""}]]},
+
+{"description":"<a aA>",
+"input":"<a aA>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a aB>",
+"input":"<a aB>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a aY>",
+"input":"<a aY>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a aZ>",
+"input":"<a aZ>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a[>",
+"input":"<a a[>",
+"output":[["StartTag", "a", {"a[":""}]]},
+
+{"description":"<a a`>",
+"input":"<a a`>",
+"output":[["StartTag", "a", {"a`":""}]]},
+
+{"description":"<a aa>",
+"input":"<a aa>",
+"output":[["StartTag", "a", {"aa":""}]]},
+
+{"description":"<a ab>",
+"input":"<a ab>",
+"output":[["StartTag", "a", {"ab":""}]]},
+
+{"description":"<a ay>",
+"input":"<a ay>",
+"output":[["StartTag", "a", {"ay":""}]]},
+
+{"description":"<a az>",
+"input":"<a az>",
+"output":[["StartTag", "a", {"az":""}]]},
+
+{"description":"<a a{>",
+"input":"<a a{>",
+"output":[["StartTag", "a", {"a{":""}]]},
+
+{"description":"<a a\\uDBC0\\uDC00>",
+"input":"<a a\uDBC0\uDC00>",
+"output":[["StartTag", "a", {"a\uDBC0\uDC00":""}]]},
+
+{"description":"<a b>",
+"input":"<a b>",
+"output":[["StartTag", "a", {"b":""}]]},
+
+{"description":"<a y>",
+"input":"<a y>",
+"output":[["StartTag", "a", {"y":""}]]},
+
+{"description":"<a z>",
+"input":"<a z>",
+"output":[["StartTag", "a", {"z":""}]]},
+
+{"description":"<a {>",
+"input":"<a {>",
+"output":[["StartTag", "a", {"{":""}]]},
+
+{"description":"<a \\uDBC0\\uDC00>",
+"input":"<a \uDBC0\uDC00>",
+"output":[["StartTag", "a", {"\uDBC0\uDC00":""}]]},
+
+{"description":"<a!>",
+"input":"<a!>",
+"output":[["StartTag", "a!", {}]]},
+
+{"description":"<a\">",
+"input":"<a\">",
+"output":[["StartTag", "a\"", {}]]},
+
+{"description":"<a&>",
+"input":"<a&>",
+"output":[["StartTag", "a&", {}]]},
+
+{"description":"<a'>",
+"input":"<a'>",
+"output":[["StartTag", "a'", {}]]},
+
+{"description":"<a->",
+"input":"<a->",
+"output":[["StartTag", "a-", {}]]},
+
+{"description":"<a.>",
+"input":"<a.>",
+"output":[["StartTag", "a.", {}]]},
+
+{"description":"<a/>",
+"input":"<a/>",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a/\\u0000>",
+"input":"<a/\u0000>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\uFFFD":""}]]},
+
+{"description":"<a/\\u0009>",
+"input":"<a/\u0009>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/\\u000A>",
+"input":"<a/\u000A>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/\\u000B>",
+"input":"<a/\u000B>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]]},
+
+{"description":"<a/\\u000C>",
+"input":"<a/\u000C>",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/ >",
+"input":"<a/ >",
+"output":["ParseError", ["StartTag", "a", {}]]},
+
+{"description":"<a/!>",
+"input":"<a/!>",
+"output":["ParseError", ["StartTag", "a", {"!":""}]]},
+
+{"description":"<a/\">",
+"input":"<a/\">",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"\"":""}]]},
+
+{"description":"<a/&>",
+"input":"<a/&>",
+"output":["ParseError", ["StartTag", "a", {"&":""}]]},
+
+{"description":"<a/'>",
+"input":"<a/'>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"'":""}]]},
+
+{"description":"<a/->",
+"input":"<a/->",
+"output":["ParseError", ["StartTag", "a", {"-":""}]]},
+
+{"description":"<a//>",
+"input":"<a//>",
+"output":["ParseError", ["StartTag", "a", {}, true]]},
+
+{"description":"<a/0>",
+"input":"<a/0>",
+"output":["ParseError", ["StartTag", "a", {"0":""}]]},
+
+{"description":"<a/1>",
+"input":"<a/1>",
+"output":["ParseError", ["StartTag", "a", {"1":""}]]},
+
+{"description":"<a/9>",
+"input":"<a/9>",
+"output":["ParseError", ["StartTag", "a", {"9":""}]]},
+
+{"description":"<a/<>",
+"input":"<a/<>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"<":""}]]},
+
+{"description":"<a/=>",
+"input":"<a/=>",
+"output":["ParseError", "ParseError", ["StartTag", "a", {"=":""}]]},
+
+{"description":"<a/>",
+"input":"<a/>",
+"output":[["StartTag", "a", {}, true]]},
+
+{"description":"<a/?>",
+"input":"<a/?>",
+"output":["ParseError", ["StartTag", "a", {"?":""}]]},
+
+{"description":"<a/@>",
+"input":"<a/@>",
+"output":["ParseError", ["StartTag", "a", {"@":""}]]},
+
+{"description":"<a/A>",
+"input":"<a/A>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a/B>",
+"input":"<a/B>",
+"output":["ParseError", ["StartTag", "a", {"b":""}]]},
+
+{"description":"<a/Y>",
+"input":"<a/Y>",
+"output":["ParseError", ["StartTag", "a", {"y":""}]]},
+
+{"description":"<a/Z>",
+"input":"<a/Z>",
+"output":["ParseError", ["StartTag", "a", {"z":""}]]},
+
+{"description":"<a/`>",
+"input":"<a/`>",
+"output":["ParseError", ["StartTag", "a", {"`":""}]]},
+
+{"description":"<a/a>",
+"input":"<a/a>",
+"output":["ParseError", ["StartTag", "a", {"a":""}]]},
+
+{"description":"<a/b>",
+"input":"<a/b>",
+"output":["ParseError", ["StartTag", "a", {"b":""}]]},
+
+{"description":"<a/y>",
+"input":"<a/y>",
+"output":["ParseError", ["StartTag", "a", {"y":""}]]},
+
+{"description":"<a/z>",
+"input":"<a/z>",
+"output":["ParseError", ["StartTag", "a", {"z":""}]]},
+
+{"description":"<a/{>",
+"input":"<a/{>",
+"output":["ParseError", ["StartTag", "a", {"{":""}]]},
+
+{"description":"<a/\\uDBC0\\uDC00>",
+"input":"<a/\uDBC0\uDC00>",
+"output":["ParseError", ["StartTag", "a", {"\uDBC0\uDC00":""}]]},
+
+{"description":"<a0>",
+"input":"<a0>",
+"output":[["StartTag", "a0", {}]]},
+
+{"description":"<a1>",
+"input":"<a1>",
+"output":[["StartTag", "a1", {}]]},
+
+{"description":"<a9>",
+"input":"<a9>",
+"output":[["StartTag", "a9", {}]]},
+
+{"description":"<a<>",
+"input":"<a<>",
+"output":[["StartTag", "a<", {}]]},
+
+{"description":"<a=>",
+"input":"<a=>",
+"output":[["StartTag", "a=", {}]]},
+
+{"description":"<a>",
+"input":"<a>",
+"output":[["StartTag", "a", {}]]},
+
+{"description":"<a?>",
+"input":"<a?>",
+"output":[["StartTag", "a?", {}]]},
+
+{"description":"<a@>",
+"input":"<a@>",
+"output":[["StartTag", "a@", {}]]},
+
+{"description":"<aA>",
+"input":"<aA>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<aB>",
+"input":"<aB>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<aY>",
+"input":"<aY>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<aZ>",
+"input":"<aZ>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a[>",
+"input":"<a[>",
+"output":[["StartTag", "a[", {}]]},
+
+{"description":"<a`>",
+"input":"<a`>",
+"output":[["StartTag", "a`", {}]]},
+
+{"description":"<aa>",
+"input":"<aa>",
+"output":[["StartTag", "aa", {}]]},
+
+{"description":"<ab>",
+"input":"<ab>",
+"output":[["StartTag", "ab", {}]]},
+
+{"description":"<ay>",
+"input":"<ay>",
+"output":[["StartTag", "ay", {}]]},
+
+{"description":"<az>",
+"input":"<az>",
+"output":[["StartTag", "az", {}]]},
+
+{"description":"<a{>",
+"input":"<a{>",
+"output":[["StartTag", "a{", {}]]},
+
+{"description":"<a\\uDBC0\\uDC00>",
+"input":"<a\uDBC0\uDC00>",
+"output":[["StartTag", "a\uDBC0\uDC00", {}]]},
+
+{"description":"<b>",
+"input":"<b>",
+"output":[["StartTag", "b", {}]]},
+
+{"description":"<y>",
+"input":"<y>",
+"output":[["StartTag", "y", {}]]},
+
+{"description":"<z>",
+"input":"<z>",
+"output":[["StartTag", "z", {}]]},
+
+{"description":"<{",
+"input":"<{",
+"output":["ParseError", ["Character", "<{"]]},
+
+{"description":"<\\uDBC0\\uDC00",
+"input":"<\uDBC0\uDC00",
+"output":["ParseError", ["Character", "<\uDBC0\uDC00"]]},
+
+{"description":"=",
+"input":"=",
+"output":[["Character", "="]]},
+
+{"description":">",
+"input":">",
+"output":[["Character", ">"]]},
+
+{"description":"?",
+"input":"?",
+"output":[["Character", "?"]]},
+
+{"description":"@",
+"input":"@",
+"output":[["Character", "@"]]},
+
+{"description":"A",
+"input":"A",
+"output":[["Character", "A"]]},
+
+{"description":"B",
+"input":"B",
+"output":[["Character", "B"]]},
+
+{"description":"Y",
+"input":"Y",
+"output":[["Character", "Y"]]},
+
+{"description":"Z",
+"input":"Z",
+"output":[["Character", "Z"]]},
+
+{"description":"`",
+"input":"`",
+"output":[["Character", "`"]]},
+
+{"description":"a",
+"input":"a",
+"output":[["Character", "a"]]},
+
+{"description":"b",
+"input":"b",
+"output":[["Character", "b"]]},
+
+{"description":"y",
+"input":"y",
+"output":[["Character", "y"]]},
+
+{"description":"z",
+"input":"z",
+"output":[["Character", "z"]]},
+
+{"description":"{",
+"input":"{",
+"output":[["Character", "{"]]},
+
+{"description":"\\uDBC0\\uDC00",
+"input":"\uDBC0\uDC00",
+"output":[["Character", "\uDBC0\uDC00"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/test4.test b/html5lib/tests/testdata/tokenizer/test4.test
new file mode 100644
index 00000000..80f859e2
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/test4.test
@@ -0,0 +1,344 @@
+{"tests": [
+
+{"description":"< in attribute name",
+"input":"<z/0  <>",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]]},
+
+{"description":"< in attribute value",
+"input":"<z x=<>",
+"output":["ParseError", ["StartTag", "z", {"x": "<"}]]},
+
+{"description":"= in unquoted attribute value",
+"input":"<z z=z=z>",
+"output":["ParseError", ["StartTag", "z", {"z": "z=z"}]]},
+
+{"description":"= attribute",
+"input":"<z =>",
+"output":["ParseError", ["StartTag", "z", {"=": ""}]]},
+
+{"description":"== attribute",
+"input":"<z ==>",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]]},
+
+{"description":"=== attribute",
+"input":"<z ===>",
+"output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]]},
+
+{"description":"==== attribute",
+"input":"<z ====>",
+"output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]]},
+
+{"description":"Allowed \" after ampersand in attribute value",
+"input":"<z z=\"&\">",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"Non-allowed ' after ampersand in attribute value",
+"input":"<z z=\"&'\">",
+"output":["ParseError", ["StartTag", "z", {"z": "&'"}]]},
+
+{"description":"Allowed ' after ampersand in attribute value",
+"input":"<z z='&'>",
+"output":[["StartTag", "z", {"z": "&"}]]},
+
+{"description":"Non-allowed \" after ampersand in attribute value",
+"input":"<z z='&\"'>",
+"output":["ParseError", ["StartTag", "z", {"z": "&\""}]]},
+
+{"description":"Text after bogus character reference",
+"input":"<z z='&xlink_xmlns;'>bar<z>",
+"output":["ParseError",["StartTag","z",{"z":"&xlink_xmlns;"}],["Character","bar"],["StartTag","z",{}]]},
+
+{"description":"Text after hex character reference",
+"input":"<z z='&#x0020; foo'>bar<z>",
+"output":[["StartTag","z",{"z":"  foo"}],["Character","bar"],["StartTag","z",{}]]},
+
+{"description":"Attribute name starting with \"",
+"input":"<foo \"='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"\"": "bar"}]]},
+
+{"description":"Attribute name starting with '",
+"input":"<foo '='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"'": "bar"}]]},
+
+{"description":"Attribute name containing \"",
+"input":"<foo a\"b='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"a\"b": "bar"}]]},
+
+{"description":"Attribute name containing '",
+"input":"<foo a'b='bar'>",
+"output":["ParseError", ["StartTag", "foo", {"a'b": "bar"}]]},
+
+{"description":"Unquoted attribute value containing '",
+"input":"<foo a=b'c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]]},
+
+{"description":"Unquoted attribute value containing \"",
+"input":"<foo a=b\"c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]]},
+
+{"description":"Double-quoted attribute value not followed by whitespace",
+"input":"<foo a=\"b\"c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
+
+{"description":"Single-quoted attribute value not followed by whitespace",
+"input":"<foo a='b'c>",
+"output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]]},
+
+{"description":"Quoted attribute followed by permitted /",
+"input":"<br a='b'/>",
+"output":[["StartTag","br",{"a":"b"},true]]},
+
+{"description":"Quoted attribute followed by non-permitted /",
+"input":"<bar a='b'/>",
+"output":[["StartTag","bar",{"a":"b"},true]]},
+
+{"description":"CR EOF after doctype name",
+"input":"<!doctype html \r",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"CR EOF in tag name",
+"input":"<z\r",
+"output":["ParseError"]},
+
+{"description":"Slash EOF in tag name",
+"input":"<z/",
+"output":["ParseError"]},
+
+{"description":"Zero hex numeric entity",
+"input":"&#x0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero decimal numeric entity",
+"input":"&#0",
+"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Zero-prefixed hex numeric entity",
+"input":"&#x000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000041;",
+"output":[["Character", "A"]]},
+
+{"description":"Zero-prefixed decimal numeric entity",
+"input":"&#000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000065;",
+"output":[["Character", "A"]]},
+
+{"description":"Empty hex numeric entities",
+"input":"&#x &#X ",
+"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]},
+
+{"description":"Empty decimal numeric entities",
+"input":"&# &#; ",
+"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]},
+
+{"description":"Non-BMP numeric entity",
+"input":"&#x10000;",
+"output":[["Character", "\uD800\uDC00"]]},
+
+{"description":"Maximum non-BMP numeric entity",
+"input":"&#X10FFFF;",
+"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]},
+
+{"description":"Above maximum numeric entity",
+"input":"&#x110000;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"32-bit hex numeric entity",
+"input":"&#x80000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit hex numeric entity",
+"input":"&#x100000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"33-bit decimal numeric entity",
+"input":"&#4294967361;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit hex numeric entity",
+"input":"&#x10000000000000041;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"65-bit decimal numeric entity",
+"input":"&#18446744073709551681;",
+"output":["ParseError", ["Character", "\uFFFD"]]},
+
+{"description":"Surrogate code point edge cases",
+"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
+"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]},
+
+{"description":"Uppercase start tag name",
+"input":"<X>",
+"output":[["StartTag", "x", {}]]},
+
+{"description":"Uppercase end tag name",
+"input":"</X>",
+"output":[["EndTag", "x"]]},
+
+{"description":"Uppercase attribute name",
+"input":"<x X>",
+"output":[["StartTag", "x", { "x":"" }]]},
+
+{"description":"Tag/attribute name case edge values",
+"input":"<x@AZ[`az{ @AZ[`az{>",
+"output":[["StartTag", "x@az[`az{", { "@az[`az{":"" }]]},
+
+{"description":"Duplicate different-case attributes",
+"input":"<x x=1 x=2 X=3>",
+"output":["ParseError", "ParseError", ["StartTag", "x", { "x":"1" }]]},
+
+{"description":"Uppercase close tag attributes",
+"input":"</x X>",
+"output":["ParseError", ["EndTag", "x"]]},
+
+{"description":"Duplicate close tag attributes",
+"input":"</x x x>",
+"output":["ParseError", "ParseError", ["EndTag", "x"]]},
+
+{"description":"Permitted slash",
+"input":"<br/>",
+"output":[["StartTag","br",{},true]]},
+
+{"description":"Non-permitted slash",
+"input":"<xr/>",
+"output":[["StartTag","xr",{},true]]},
+
+{"description":"Permitted slash but in close tag",
+"input":"</br/>",
+"output":["ParseError", ["EndTag", "br"]]},
+
+{"description":"Doctype public case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl PuBlIc \"AbC\" \"XyZ\">",
+"output":[["DOCTYPE", "html", "AbC", "XyZ", true]]},
+
+{"description":"Doctype public case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL pUbLiC \"aBc\" \"xYz\">",
+"output":[["DOCTYPE", "html", "aBc", "xYz", true]]},
+
+{"description":"Doctype system case-sensitivity (1)",
+"input":"<!DoCtYpE HtMl SyStEm \"XyZ\">",
+"output":[["DOCTYPE", "html", null, "XyZ", true]]},
+
+{"description":"Doctype system case-sensitivity (2)",
+"input":"<!dOcTyPe hTmL sYsTeM \"xYz\">",
+"output":[["DOCTYPE", "html", null, "xYz", true]]},
+
+{"description":"U+0000 in lookahead region after non-matching character",
+"input":"<!doc>\u0000",
+"output":["ParseError", ["Comment", "doc"], "ParseError", ["Character", "\u0000"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+0000 in lookahead region",
+"input":"<!doc\u0000",
+"output":["ParseError", ["Comment", "doc\uFFFD"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+0080 in lookahead region",
+"input":"<!doc\u0080",
+"output":["ParseError", "ParseError", ["Comment", "doc\u0080"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+FDD1 in lookahead region",
+"input":"<!doc\uFDD1",
+"output":["ParseError", "ParseError", ["Comment", "doc\uFDD1"]],
+"ignoreErrorOrder":true},
+
+{"description":"U+1FFFF in lookahead region",
+"input":"<!doc\uD83F\uDFFF",
+"output":["ParseError", "ParseError", ["Comment", "doc\uD83F\uDFFF"]],
+"ignoreErrorOrder":true},
+
+{"description":"CR followed by non-LF",
+"input":"\r?",
+"output":[["Character", "\n?"]]},
+
+{"description":"CR at EOF",
+"input":"\r",
+"output":[["Character", "\n"]]},
+
+{"description":"LF at EOF",
+"input":"\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR LF",
+"input":"\r\n",
+"output":[["Character", "\n"]]},
+
+{"description":"CR CR",
+"input":"\r\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF LF",
+"input":"\n\n",
+"output":[["Character", "\n\n"]]},
+
+{"description":"LF CR",
+"input":"\n\r",
+"output":[["Character", "\n\n"]]},
+
+{"description":"text CR CR CR text",
+"input":"text\r\r\rtext",
+"output":[["Character", "text\n\n\ntext"]]},
+
+{"description":"Doctype publik",
+"input":"<!DOCTYPE html PUBLIK \"AbC\" \"XyZ\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype publi",
+"input":"<!DOCTYPE html PUBLI",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sistem",
+"input":"<!DOCTYPE html SISTEM \"AbC\">",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype sys",
+"input":"<!DOCTYPE html SYS",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false]]},
+
+{"description":"Doctype html x>text",
+"input":"<!DOCTYPE html x>text",
+"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]},
+
+{"description":"Grave accent in unquoted attribute",
+"input":"<a a=aa`>",
+"output":["ParseError", ["StartTag", "a", {"a":"aa`"}]]},
+
+{"description":"EOF in tag name state ",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in tag name state",
+"input":"<a",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute name state",
+"input":"<a ",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute name state",
+"input":"<a a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute name state",
+"input":"<a a ",
+"output":["ParseError"]},
+
+{"description":"EOF in before attribute value state",
+"input":"<a a =",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (double quoted) state",
+"input":"<a a =\"a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (single quoted) state",
+"input":"<a a ='a",
+"output":["ParseError"]},
+
+{"description":"EOF in attribute value (unquoted) state",
+"input":"<a a =a",
+"output":["ParseError"]},
+
+{"description":"EOF in after attribute value state",
+"input":"<a a ='a'",
+"output":["ParseError"]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/unicodeChars.test b/html5lib/tests/testdata/tokenizer/unicodeChars.test
new file mode 100644
index 00000000..c7786682
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/unicodeChars.test
@@ -0,0 +1,1295 @@
+{"tests": [
+
+{"description": "Invalid Unicode character U+0001",
+"input": "\u0001",
+"output": ["ParseError", ["Character", "\u0001"]]},
+
+{"description": "Invalid Unicode character U+0002",
+"input": "\u0002",
+"output": ["ParseError", ["Character", "\u0002"]]},
+
+{"description": "Invalid Unicode character U+0003",
+"input": "\u0003",
+"output": ["ParseError", ["Character", "\u0003"]]},
+
+{"description": "Invalid Unicode character U+0004",
+"input": "\u0004",
+"output": ["ParseError", ["Character", "\u0004"]]},
+
+{"description": "Invalid Unicode character U+0005",
+"input": "\u0005",
+"output": ["ParseError", ["Character", "\u0005"]]},
+
+{"description": "Invalid Unicode character U+0006",
+"input": "\u0006",
+"output": ["ParseError", ["Character", "\u0006"]]},
+
+{"description": "Invalid Unicode character U+0007",
+"input": "\u0007",
+"output": ["ParseError", ["Character", "\u0007"]]},
+
+{"description": "Invalid Unicode character U+0008",
+"input": "\u0008",
+"output": ["ParseError", ["Character", "\u0008"]]},
+
+{"description": "Invalid Unicode character U+000B",
+"input": "\u000B",
+"output": ["ParseError", ["Character", "\u000B"]]},
+
+{"description": "Invalid Unicode character U+000E",
+"input": "\u000E",
+"output": ["ParseError", ["Character", "\u000E"]]},
+
+{"description": "Invalid Unicode character U+000F",
+"input": "\u000F",
+"output": ["ParseError", ["Character", "\u000F"]]},
+
+{"description": "Invalid Unicode character U+0010",
+"input": "\u0010",
+"output": ["ParseError", ["Character", "\u0010"]]},
+
+{"description": "Invalid Unicode character U+0011",
+"input": "\u0011",
+"output": ["ParseError", ["Character", "\u0011"]]},
+
+{"description": "Invalid Unicode character U+0012",
+"input": "\u0012",
+"output": ["ParseError", ["Character", "\u0012"]]},
+
+{"description": "Invalid Unicode character U+0013",
+"input": "\u0013",
+"output": ["ParseError", ["Character", "\u0013"]]},
+
+{"description": "Invalid Unicode character U+0014",
+"input": "\u0014",
+"output": ["ParseError", ["Character", "\u0014"]]},
+
+{"description": "Invalid Unicode character U+0015",
+"input": "\u0015",
+"output": ["ParseError", ["Character", "\u0015"]]},
+
+{"description": "Invalid Unicode character U+0016",
+"input": "\u0016",
+"output": ["ParseError", ["Character", "\u0016"]]},
+
+{"description": "Invalid Unicode character U+0017",
+"input": "\u0017",
+"output": ["ParseError", ["Character", "\u0017"]]},
+
+{"description": "Invalid Unicode character U+0018",
+"input": "\u0018",
+"output": ["ParseError", ["Character", "\u0018"]]},
+
+{"description": "Invalid Unicode character U+0019",
+"input": "\u0019",
+"output": ["ParseError", ["Character", "\u0019"]]},
+
+{"description": "Invalid Unicode character U+001A",
+"input": "\u001A",
+"output": ["ParseError", ["Character", "\u001A"]]},
+
+{"description": "Invalid Unicode character U+001B",
+"input": "\u001B",
+"output": ["ParseError", ["Character", "\u001B"]]},
+
+{"description": "Invalid Unicode character U+001C",
+"input": "\u001C",
+"output": ["ParseError", ["Character", "\u001C"]]},
+
+{"description": "Invalid Unicode character U+001D",
+"input": "\u001D",
+"output": ["ParseError", ["Character", "\u001D"]]},
+
+{"description": "Invalid Unicode character U+001E",
+"input": "\u001E",
+"output": ["ParseError", ["Character", "\u001E"]]},
+
+{"description": "Invalid Unicode character U+001F",
+"input": "\u001F",
+"output": ["ParseError", ["Character", "\u001F"]]},
+
+{"description": "Invalid Unicode character U+007F",
+"input": "\u007F",
+"output": ["ParseError", ["Character", "\u007F"]]},
+
+{"description": "Invalid Unicode character U+FDD0",
+"input": "\uFDD0",
+"output": ["ParseError", ["Character", "\uFDD0"]]},
+
+{"description": "Invalid Unicode character U+FDD1",
+"input": "\uFDD1",
+"output": ["ParseError", ["Character", "\uFDD1"]]},
+
+{"description": "Invalid Unicode character U+FDD2",
+"input": "\uFDD2",
+"output": ["ParseError", ["Character", "\uFDD2"]]},
+
+{"description": "Invalid Unicode character U+FDD3",
+"input": "\uFDD3",
+"output": ["ParseError", ["Character", "\uFDD3"]]},
+
+{"description": "Invalid Unicode character U+FDD4",
+"input": "\uFDD4",
+"output": ["ParseError", ["Character", "\uFDD4"]]},
+
+{"description": "Invalid Unicode character U+FDD5",
+"input": "\uFDD5",
+"output": ["ParseError", ["Character", "\uFDD5"]]},
+
+{"description": "Invalid Unicode character U+FDD6",
+"input": "\uFDD6",
+"output": ["ParseError", ["Character", "\uFDD6"]]},
+
+{"description": "Invalid Unicode character U+FDD7",
+"input": "\uFDD7",
+"output": ["ParseError", ["Character", "\uFDD7"]]},
+
+{"description": "Invalid Unicode character U+FDD8",
+"input": "\uFDD8",
+"output": ["ParseError", ["Character", "\uFDD8"]]},
+
+{"description": "Invalid Unicode character U+FDD9",
+"input": "\uFDD9",
+"output": ["ParseError", ["Character", "\uFDD9"]]},
+
+{"description": "Invalid Unicode character U+FDDA",
+"input": "\uFDDA",
+"output": ["ParseError", ["Character", "\uFDDA"]]},
+
+{"description": "Invalid Unicode character U+FDDB",
+"input": "\uFDDB",
+"output": ["ParseError", ["Character", "\uFDDB"]]},
+
+{"description": "Invalid Unicode character U+FDDC",
+"input": "\uFDDC",
+"output": ["ParseError", ["Character", "\uFDDC"]]},
+
+{"description": "Invalid Unicode character U+FDDD",
+"input": "\uFDDD",
+"output": ["ParseError", ["Character", "\uFDDD"]]},
+
+{"description": "Invalid Unicode character U+FDDE",
+"input": "\uFDDE",
+"output": ["ParseError", ["Character", "\uFDDE"]]},
+
+{"description": "Invalid Unicode character U+FDDF",
+"input": "\uFDDF",
+"output": ["ParseError", ["Character", "\uFDDF"]]},
+
+{"description": "Invalid Unicode character U+FDE0",
+"input": "\uFDE0",
+"output": ["ParseError", ["Character", "\uFDE0"]]},
+
+{"description": "Invalid Unicode character U+FDE1",
+"input": "\uFDE1",
+"output": ["ParseError", ["Character", "\uFDE1"]]},
+
+{"description": "Invalid Unicode character U+FDE2",
+"input": "\uFDE2",
+"output": ["ParseError", ["Character", "\uFDE2"]]},
+
+{"description": "Invalid Unicode character U+FDE3",
+"input": "\uFDE3",
+"output": ["ParseError", ["Character", "\uFDE3"]]},
+
+{"description": "Invalid Unicode character U+FDE4",
+"input": "\uFDE4",
+"output": ["ParseError", ["Character", "\uFDE4"]]},
+
+{"description": "Invalid Unicode character U+FDE5",
+"input": "\uFDE5",
+"output": ["ParseError", ["Character", "\uFDE5"]]},
+
+{"description": "Invalid Unicode character U+FDE6",
+"input": "\uFDE6",
+"output": ["ParseError", ["Character", "\uFDE6"]]},
+
+{"description": "Invalid Unicode character U+FDE7",
+"input": "\uFDE7",
+"output": ["ParseError", ["Character", "\uFDE7"]]},
+
+{"description": "Invalid Unicode character U+FDE8",
+"input": "\uFDE8",
+"output": ["ParseError", ["Character", "\uFDE8"]]},
+
+{"description": "Invalid Unicode character U+FDE9",
+"input": "\uFDE9",
+"output": ["ParseError", ["Character", "\uFDE9"]]},
+
+{"description": "Invalid Unicode character U+FDEA",
+"input": "\uFDEA",
+"output": ["ParseError", ["Character", "\uFDEA"]]},
+
+{"description": "Invalid Unicode character U+FDEB",
+"input": "\uFDEB",
+"output": ["ParseError", ["Character", "\uFDEB"]]},
+
+{"description": "Invalid Unicode character U+FDEC",
+"input": "\uFDEC",
+"output": ["ParseError", ["Character", "\uFDEC"]]},
+
+{"description": "Invalid Unicode character U+FDED",
+"input": "\uFDED",
+"output": ["ParseError", ["Character", "\uFDED"]]},
+
+{"description": "Invalid Unicode character U+FDEE",
+"input": "\uFDEE",
+"output": ["ParseError", ["Character", "\uFDEE"]]},
+
+{"description": "Invalid Unicode character U+FDEF",
+"input": "\uFDEF",
+"output": ["ParseError", ["Character", "\uFDEF"]]},
+
+{"description": "Invalid Unicode character U+FFFE",
+"input": "\uFFFE",
+"output": ["ParseError", ["Character", "\uFFFE"]]},
+
+{"description": "Invalid Unicode character U+FFFF",
+"input": "\uFFFF",
+"output": ["ParseError", ["Character", "\uFFFF"]]},
+
+{"description": "Invalid Unicode character U+1FFFE",
+"input": "\uD83F\uDFFE",
+"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+1FFFF",
+"input": "\uD83F\uDFFF",
+"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+2FFFE",
+"input": "\uD87F\uDFFE",
+"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+2FFFF",
+"input": "\uD87F\uDFFF",
+"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+3FFFE",
+"input": "\uD8BF\uDFFE",
+"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+3FFFF",
+"input": "\uD8BF\uDFFF",
+"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+4FFFE",
+"input": "\uD8FF\uDFFE",
+"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+4FFFF",
+"input": "\uD8FF\uDFFF",
+"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+5FFFE",
+"input": "\uD93F\uDFFE",
+"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+5FFFF",
+"input": "\uD93F\uDFFF",
+"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+6FFFE",
+"input": "\uD97F\uDFFE",
+"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+6FFFF",
+"input": "\uD97F\uDFFF",
+"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+7FFFE",
+"input": "\uD9BF\uDFFE",
+"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+7FFFF",
+"input": "\uD9BF\uDFFF",
+"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+8FFFE",
+"input": "\uD9FF\uDFFE",
+"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+8FFFF",
+"input": "\uD9FF\uDFFF",
+"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+9FFFE",
+"input": "\uDA3F\uDFFE",
+"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+9FFFF",
+"input": "\uDA3F\uDFFF",
+"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+AFFFE",
+"input": "\uDA7F\uDFFE",
+"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+AFFFF",
+"input": "\uDA7F\uDFFF",
+"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+BFFFE",
+"input": "\uDABF\uDFFE",
+"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+BFFFF",
+"input": "\uDABF\uDFFF",
+"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+CFFFE",
+"input": "\uDAFF\uDFFE",
+"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+CFFFF",
+"input": "\uDAFF\uDFFF",
+"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+DFFFE",
+"input": "\uDB3F\uDFFE",
+"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+DFFFF",
+"input": "\uDB3F\uDFFF",
+"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+EFFFE",
+"input": "\uDB7F\uDFFE",
+"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+EFFFF",
+"input": "\uDB7F\uDFFF",
+"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+FFFFE",
+"input": "\uDBBF\uDFFE",
+"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+FFFFF",
+"input": "\uDBBF\uDFFF",
+"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]},
+
+{"description": "Invalid Unicode character U+10FFFE",
+"input": "\uDBFF\uDFFE",
+"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]},
+
+{"description": "Invalid Unicode character U+10FFFF",
+"input": "\uDBFF\uDFFF",
+"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]},
+
+{"description": "Valid Unicode character U+0009",
+"input": "\u0009",
+"output": [["Character", "\u0009"]]},
+
+{"description": "Valid Unicode character U+000A",
+"input": "\u000A",
+"output": [["Character", "\u000A"]]},
+
+{"description": "Valid Unicode character U+0020",
+"input": "\u0020",
+"output": [["Character", "\u0020"]]},
+
+{"description": "Valid Unicode character U+0021",
+"input": "\u0021",
+"output": [["Character", "\u0021"]]},
+
+{"description": "Valid Unicode character U+0022",
+"input": "\u0022",
+"output": [["Character", "\u0022"]]},
+
+{"description": "Valid Unicode character U+0023",
+"input": "\u0023",
+"output": [["Character", "\u0023"]]},
+
+{"description": "Valid Unicode character U+0024",
+"input": "\u0024",
+"output": [["Character", "\u0024"]]},
+
+{"description": "Valid Unicode character U+0025",
+"input": "\u0025",
+"output": [["Character", "\u0025"]]},
+
+{"description": "Valid Unicode character U+0026",
+"input": "\u0026",
+"output": [["Character", "\u0026"]]},
+
+{"description": "Valid Unicode character U+0027",
+"input": "\u0027",
+"output": [["Character", "\u0027"]]},
+
+{"description": "Valid Unicode character U+0028",
+"input": "\u0028",
+"output": [["Character", "\u0028"]]},
+
+{"description": "Valid Unicode character U+0029",
+"input": "\u0029",
+"output": [["Character", "\u0029"]]},
+
+{"description": "Valid Unicode character U+002A",
+"input": "\u002A",
+"output": [["Character", "\u002A"]]},
+
+{"description": "Valid Unicode character U+002B",
+"input": "\u002B",
+"output": [["Character", "\u002B"]]},
+
+{"description": "Valid Unicode character U+002C",
+"input": "\u002C",
+"output": [["Character", "\u002C"]]},
+
+{"description": "Valid Unicode character U+002D",
+"input": "\u002D",
+"output": [["Character", "\u002D"]]},
+
+{"description": "Valid Unicode character U+002E",
+"input": "\u002E",
+"output": [["Character", "\u002E"]]},
+
+{"description": "Valid Unicode character U+002F",
+"input": "\u002F",
+"output": [["Character", "\u002F"]]},
+
+{"description": "Valid Unicode character U+0030",
+"input": "\u0030",
+"output": [["Character", "\u0030"]]},
+
+{"description": "Valid Unicode character U+0031",
+"input": "\u0031",
+"output": [["Character", "\u0031"]]},
+
+{"description": "Valid Unicode character U+0032",
+"input": "\u0032",
+"output": [["Character", "\u0032"]]},
+
+{"description": "Valid Unicode character U+0033",
+"input": "\u0033",
+"output": [["Character", "\u0033"]]},
+
+{"description": "Valid Unicode character U+0034",
+"input": "\u0034",
+"output": [["Character", "\u0034"]]},
+
+{"description": "Valid Unicode character U+0035",
+"input": "\u0035",
+"output": [["Character", "\u0035"]]},
+
+{"description": "Valid Unicode character U+0036",
+"input": "\u0036",
+"output": [["Character", "\u0036"]]},
+
+{"description": "Valid Unicode character U+0037",
+"input": "\u0037",
+"output": [["Character", "\u0037"]]},
+
+{"description": "Valid Unicode character U+0038",
+"input": "\u0038",
+"output": [["Character", "\u0038"]]},
+
+{"description": "Valid Unicode character U+0039",
+"input": "\u0039",
+"output": [["Character", "\u0039"]]},
+
+{"description": "Valid Unicode character U+003A",
+"input": "\u003A",
+"output": [["Character", "\u003A"]]},
+
+{"description": "Valid Unicode character U+003B",
+"input": "\u003B",
+"output": [["Character", "\u003B"]]},
+
+{"description": "Valid Unicode character U+003D",
+"input": "\u003D",
+"output": [["Character", "\u003D"]]},
+
+{"description": "Valid Unicode character U+003E",
+"input": "\u003E",
+"output": [["Character", "\u003E"]]},
+
+{"description": "Valid Unicode character U+003F",
+"input": "\u003F",
+"output": [["Character", "\u003F"]]},
+
+{"description": "Valid Unicode character U+0040",
+"input": "\u0040",
+"output": [["Character", "\u0040"]]},
+
+{"description": "Valid Unicode character U+0041",
+"input": "\u0041",
+"output": [["Character", "\u0041"]]},
+
+{"description": "Valid Unicode character U+0042",
+"input": "\u0042",
+"output": [["Character", "\u0042"]]},
+
+{"description": "Valid Unicode character U+0043",
+"input": "\u0043",
+"output": [["Character", "\u0043"]]},
+
+{"description": "Valid Unicode character U+0044",
+"input": "\u0044",
+"output": [["Character", "\u0044"]]},
+
+{"description": "Valid Unicode character U+0045",
+"input": "\u0045",
+"output": [["Character", "\u0045"]]},
+
+{"description": "Valid Unicode character U+0046",
+"input": "\u0046",
+"output": [["Character", "\u0046"]]},
+
+{"description": "Valid Unicode character U+0047",
+"input": "\u0047",
+"output": [["Character", "\u0047"]]},
+
+{"description": "Valid Unicode character U+0048",
+"input": "\u0048",
+"output": [["Character", "\u0048"]]},
+
+{"description": "Valid Unicode character U+0049",
+"input": "\u0049",
+"output": [["Character", "\u0049"]]},
+
+{"description": "Valid Unicode character U+004A",
+"input": "\u004A",
+"output": [["Character", "\u004A"]]},
+
+{"description": "Valid Unicode character U+004B",
+"input": "\u004B",
+"output": [["Character", "\u004B"]]},
+
+{"description": "Valid Unicode character U+004C",
+"input": "\u004C",
+"output": [["Character", "\u004C"]]},
+
+{"description": "Valid Unicode character U+004D",
+"input": "\u004D",
+"output": [["Character", "\u004D"]]},
+
+{"description": "Valid Unicode character U+004E",
+"input": "\u004E",
+"output": [["Character", "\u004E"]]},
+
+{"description": "Valid Unicode character U+004F",
+"input": "\u004F",
+"output": [["Character", "\u004F"]]},
+
+{"description": "Valid Unicode character U+0050",
+"input": "\u0050",
+"output": [["Character", "\u0050"]]},
+
+{"description": "Valid Unicode character U+0051",
+"input": "\u0051",
+"output": [["Character", "\u0051"]]},
+
+{"description": "Valid Unicode character U+0052",
+"input": "\u0052",
+"output": [["Character", "\u0052"]]},
+
+{"description": "Valid Unicode character U+0053",
+"input": "\u0053",
+"output": [["Character", "\u0053"]]},
+
+{"description": "Valid Unicode character U+0054",
+"input": "\u0054",
+"output": [["Character", "\u0054"]]},
+
+{"description": "Valid Unicode character U+0055",
+"input": "\u0055",
+"output": [["Character", "\u0055"]]},
+
+{"description": "Valid Unicode character U+0056",
+"input": "\u0056",
+"output": [["Character", "\u0056"]]},
+
+{"description": "Valid Unicode character U+0057",
+"input": "\u0057",
+"output": [["Character", "\u0057"]]},
+
+{"description": "Valid Unicode character U+0058",
+"input": "\u0058",
+"output": [["Character", "\u0058"]]},
+
+{"description": "Valid Unicode character U+0059",
+"input": "\u0059",
+"output": [["Character", "\u0059"]]},
+
+{"description": "Valid Unicode character U+005A",
+"input": "\u005A",
+"output": [["Character", "\u005A"]]},
+
+{"description": "Valid Unicode character U+005B",
+"input": "\u005B",
+"output": [["Character", "\u005B"]]},
+
+{"description": "Valid Unicode character U+005C",
+"input": "\u005C",
+"output": [["Character", "\u005C"]]},
+
+{"description": "Valid Unicode character U+005D",
+"input": "\u005D",
+"output": [["Character", "\u005D"]]},
+
+{"description": "Valid Unicode character U+005E",
+"input": "\u005E",
+"output": [["Character", "\u005E"]]},
+
+{"description": "Valid Unicode character U+005F",
+"input": "\u005F",
+"output": [["Character", "\u005F"]]},
+
+{"description": "Valid Unicode character U+0060",
+"input": "\u0060",
+"output": [["Character", "\u0060"]]},
+
+{"description": "Valid Unicode character U+0061",
+"input": "\u0061",
+"output": [["Character", "\u0061"]]},
+
+{"description": "Valid Unicode character U+0062",
+"input": "\u0062",
+"output": [["Character", "\u0062"]]},
+
+{"description": "Valid Unicode character U+0063",
+"input": "\u0063",
+"output": [["Character", "\u0063"]]},
+
+{"description": "Valid Unicode character U+0064",
+"input": "\u0064",
+"output": [["Character", "\u0064"]]},
+
+{"description": "Valid Unicode character U+0065",
+"input": "\u0065",
+"output": [["Character", "\u0065"]]},
+
+{"description": "Valid Unicode character U+0066",
+"input": "\u0066",
+"output": [["Character", "\u0066"]]},
+
+{"description": "Valid Unicode character U+0067",
+"input": "\u0067",
+"output": [["Character", "\u0067"]]},
+
+{"description": "Valid Unicode character U+0068",
+"input": "\u0068",
+"output": [["Character", "\u0068"]]},
+
+{"description": "Valid Unicode character U+0069",
+"input": "\u0069",
+"output": [["Character", "\u0069"]]},
+
+{"description": "Valid Unicode character U+006A",
+"input": "\u006A",
+"output": [["Character", "\u006A"]]},
+
+{"description": "Valid Unicode character U+006B",
+"input": "\u006B",
+"output": [["Character", "\u006B"]]},
+
+{"description": "Valid Unicode character U+006C",
+"input": "\u006C",
+"output": [["Character", "\u006C"]]},
+
+{"description": "Valid Unicode character U+006D",
+"input": "\u006D",
+"output": [["Character", "\u006D"]]},
+
+{"description": "Valid Unicode character U+006E",
+"input": "\u006E",
+"output": [["Character", "\u006E"]]},
+
+{"description": "Valid Unicode character U+006F",
+"input": "\u006F",
+"output": [["Character", "\u006F"]]},
+
+{"description": "Valid Unicode character U+0070",
+"input": "\u0070",
+"output": [["Character", "\u0070"]]},
+
+{"description": "Valid Unicode character U+0071",
+"input": "\u0071",
+"output": [["Character", "\u0071"]]},
+
+{"description": "Valid Unicode character U+0072",
+"input": "\u0072",
+"output": [["Character", "\u0072"]]},
+
+{"description": "Valid Unicode character U+0073",
+"input": "\u0073",
+"output": [["Character", "\u0073"]]},
+
+{"description": "Valid Unicode character U+0074",
+"input": "\u0074",
+"output": [["Character", "\u0074"]]},
+
+{"description": "Valid Unicode character U+0075",
+"input": "\u0075",
+"output": [["Character", "\u0075"]]},
+
+{"description": "Valid Unicode character U+0076",
+"input": "\u0076",
+"output": [["Character", "\u0076"]]},
+
+{"description": "Valid Unicode character U+0077",
+"input": "\u0077",
+"output": [["Character", "\u0077"]]},
+
+{"description": "Valid Unicode character U+0078",
+"input": "\u0078",
+"output": [["Character", "\u0078"]]},
+
+{"description": "Valid Unicode character U+0079",
+"input": "\u0079",
+"output": [["Character", "\u0079"]]},
+
+{"description": "Valid Unicode character U+007A",
+"input": "\u007A",
+"output": [["Character", "\u007A"]]},
+
+{"description": "Valid Unicode character U+007B",
+"input": "\u007B",
+"output": [["Character", "\u007B"]]},
+
+{"description": "Valid Unicode character U+007C",
+"input": "\u007C",
+"output": [["Character", "\u007C"]]},
+
+{"description": "Valid Unicode character U+007D",
+"input": "\u007D",
+"output": [["Character", "\u007D"]]},
+
+{"description": "Valid Unicode character U+007E",
+"input": "\u007E",
+"output": [["Character", "\u007E"]]},
+
+{"description": "Valid Unicode character U+00A0",
+"input": "\u00A0",
+"output": [["Character", "\u00A0"]]},
+
+{"description": "Valid Unicode character U+00A1",
+"input": "\u00A1",
+"output": [["Character", "\u00A1"]]},
+
+{"description": "Valid Unicode character U+00A2",
+"input": "\u00A2",
+"output": [["Character", "\u00A2"]]},
+
+{"description": "Valid Unicode character U+00A3",
+"input": "\u00A3",
+"output": [["Character", "\u00A3"]]},
+
+{"description": "Valid Unicode character U+00A4",
+"input": "\u00A4",
+"output": [["Character", "\u00A4"]]},
+
+{"description": "Valid Unicode character U+00A5",
+"input": "\u00A5",
+"output": [["Character", "\u00A5"]]},
+
+{"description": "Valid Unicode character U+00A6",
+"input": "\u00A6",
+"output": [["Character", "\u00A6"]]},
+
+{"description": "Valid Unicode character U+00A7",
+"input": "\u00A7",
+"output": [["Character", "\u00A7"]]},
+
+{"description": "Valid Unicode character U+00A8",
+"input": "\u00A8",
+"output": [["Character", "\u00A8"]]},
+
+{"description": "Valid Unicode character U+00A9",
+"input": "\u00A9",
+"output": [["Character", "\u00A9"]]},
+
+{"description": "Valid Unicode character U+00AA",
+"input": "\u00AA",
+"output": [["Character", "\u00AA"]]},
+
+{"description": "Valid Unicode character U+00AB",
+"input": "\u00AB",
+"output": [["Character", "\u00AB"]]},
+
+{"description": "Valid Unicode character U+00AC",
+"input": "\u00AC",
+"output": [["Character", "\u00AC"]]},
+
+{"description": "Valid Unicode character U+00AD",
+"input": "\u00AD",
+"output": [["Character", "\u00AD"]]},
+
+{"description": "Valid Unicode character U+00AE",
+"input": "\u00AE",
+"output": [["Character", "\u00AE"]]},
+
+{"description": "Valid Unicode character U+00AF",
+"input": "\u00AF",
+"output": [["Character", "\u00AF"]]},
+
+{"description": "Valid Unicode character U+00B0",
+"input": "\u00B0",
+"output": [["Character", "\u00B0"]]},
+
+{"description": "Valid Unicode character U+00B1",
+"input": "\u00B1",
+"output": [["Character", "\u00B1"]]},
+
+{"description": "Valid Unicode character U+00B2",
+"input": "\u00B2",
+"output": [["Character", "\u00B2"]]},
+
+{"description": "Valid Unicode character U+00B3",
+"input": "\u00B3",
+"output": [["Character", "\u00B3"]]},
+
+{"description": "Valid Unicode character U+00B4",
+"input": "\u00B4",
+"output": [["Character", "\u00B4"]]},
+
+{"description": "Valid Unicode character U+00B5",
+"input": "\u00B5",
+"output": [["Character", "\u00B5"]]},
+
+{"description": "Valid Unicode character U+00B6",
+"input": "\u00B6",
+"output": [["Character", "\u00B6"]]},
+
+{"description": "Valid Unicode character U+00B7",
+"input": "\u00B7",
+"output": [["Character", "\u00B7"]]},
+
+{"description": "Valid Unicode character U+00B8",
+"input": "\u00B8",
+"output": [["Character", "\u00B8"]]},
+
+{"description": "Valid Unicode character U+00B9",
+"input": "\u00B9",
+"output": [["Character", "\u00B9"]]},
+
+{"description": "Valid Unicode character U+00BA",
+"input": "\u00BA",
+"output": [["Character", "\u00BA"]]},
+
+{"description": "Valid Unicode character U+00BB",
+"input": "\u00BB",
+"output": [["Character", "\u00BB"]]},
+
+{"description": "Valid Unicode character U+00BC",
+"input": "\u00BC",
+"output": [["Character", "\u00BC"]]},
+
+{"description": "Valid Unicode character U+00BD",
+"input": "\u00BD",
+"output": [["Character", "\u00BD"]]},
+
+{"description": "Valid Unicode character U+00BE",
+"input": "\u00BE",
+"output": [["Character", "\u00BE"]]},
+
+{"description": "Valid Unicode character U+00BF",
+"input": "\u00BF",
+"output": [["Character", "\u00BF"]]},
+
+{"description": "Valid Unicode character U+00C0",
+"input": "\u00C0",
+"output": [["Character", "\u00C0"]]},
+
+{"description": "Valid Unicode character U+00C1",
+"input": "\u00C1",
+"output": [["Character", "\u00C1"]]},
+
+{"description": "Valid Unicode character U+00C2",
+"input": "\u00C2",
+"output": [["Character", "\u00C2"]]},
+
+{"description": "Valid Unicode character U+00C3",
+"input": "\u00C3",
+"output": [["Character", "\u00C3"]]},
+
+{"description": "Valid Unicode character U+00C4",
+"input": "\u00C4",
+"output": [["Character", "\u00C4"]]},
+
+{"description": "Valid Unicode character U+00C5",
+"input": "\u00C5",
+"output": [["Character", "\u00C5"]]},
+
+{"description": "Valid Unicode character U+00C6",
+"input": "\u00C6",
+"output": [["Character", "\u00C6"]]},
+
+{"description": "Valid Unicode character U+00C7",
+"input": "\u00C7",
+"output": [["Character", "\u00C7"]]},
+
+{"description": "Valid Unicode character U+00C8",
+"input": "\u00C8",
+"output": [["Character", "\u00C8"]]},
+
+{"description": "Valid Unicode character U+00C9",
+"input": "\u00C9",
+"output": [["Character", "\u00C9"]]},
+
+{"description": "Valid Unicode character U+00CA",
+"input": "\u00CA",
+"output": [["Character", "\u00CA"]]},
+
+{"description": "Valid Unicode character U+00CB",
+"input": "\u00CB",
+"output": [["Character", "\u00CB"]]},
+
+{"description": "Valid Unicode character U+00CC",
+"input": "\u00CC",
+"output": [["Character", "\u00CC"]]},
+
+{"description": "Valid Unicode character U+00CD",
+"input": "\u00CD",
+"output": [["Character", "\u00CD"]]},
+
+{"description": "Valid Unicode character U+00CE",
+"input": "\u00CE",
+"output": [["Character", "\u00CE"]]},
+
+{"description": "Valid Unicode character U+00CF",
+"input": "\u00CF",
+"output": [["Character", "\u00CF"]]},
+
+{"description": "Valid Unicode character U+00D0",
+"input": "\u00D0",
+"output": [["Character", "\u00D0"]]},
+
+{"description": "Valid Unicode character U+00D1",
+"input": "\u00D1",
+"output": [["Character", "\u00D1"]]},
+
+{"description": "Valid Unicode character U+00D2",
+"input": "\u00D2",
+"output": [["Character", "\u00D2"]]},
+
+{"description": "Valid Unicode character U+00D3",
+"input": "\u00D3",
+"output": [["Character", "\u00D3"]]},
+
+{"description": "Valid Unicode character U+00D4",
+"input": "\u00D4",
+"output": [["Character", "\u00D4"]]},
+
+{"description": "Valid Unicode character U+00D5",
+"input": "\u00D5",
+"output": [["Character", "\u00D5"]]},
+
+{"description": "Valid Unicode character U+00D6",
+"input": "\u00D6",
+"output": [["Character", "\u00D6"]]},
+
+{"description": "Valid Unicode character U+00D7",
+"input": "\u00D7",
+"output": [["Character", "\u00D7"]]},
+
+{"description": "Valid Unicode character U+00D8",
+"input": "\u00D8",
+"output": [["Character", "\u00D8"]]},
+
+{"description": "Valid Unicode character U+00D9",
+"input": "\u00D9",
+"output": [["Character", "\u00D9"]]},
+
+{"description": "Valid Unicode character U+00DA",
+"input": "\u00DA",
+"output": [["Character", "\u00DA"]]},
+
+{"description": "Valid Unicode character U+00DB",
+"input": "\u00DB",
+"output": [["Character", "\u00DB"]]},
+
+{"description": "Valid Unicode character U+00DC",
+"input": "\u00DC",
+"output": [["Character", "\u00DC"]]},
+
+{"description": "Valid Unicode character U+00DD",
+"input": "\u00DD",
+"output": [["Character", "\u00DD"]]},
+
+{"description": "Valid Unicode character U+00DE",
+"input": "\u00DE",
+"output": [["Character", "\u00DE"]]},
+
+{"description": "Valid Unicode character U+00DF",
+"input": "\u00DF",
+"output": [["Character", "\u00DF"]]},
+
+{"description": "Valid Unicode character U+00E0",
+"input": "\u00E0",
+"output": [["Character", "\u00E0"]]},
+
+{"description": "Valid Unicode character U+00E1",
+"input": "\u00E1",
+"output": [["Character", "\u00E1"]]},
+
+{"description": "Valid Unicode character U+00E2",
+"input": "\u00E2",
+"output": [["Character", "\u00E2"]]},
+
+{"description": "Valid Unicode character U+00E3",
+"input": "\u00E3",
+"output": [["Character", "\u00E3"]]},
+
+{"description": "Valid Unicode character U+00E4",
+"input": "\u00E4",
+"output": [["Character", "\u00E4"]]},
+
+{"description": "Valid Unicode character U+00E5",
+"input": "\u00E5",
+"output": [["Character", "\u00E5"]]},
+
+{"description": "Valid Unicode character U+00E6",
+"input": "\u00E6",
+"output": [["Character", "\u00E6"]]},
+
+{"description": "Valid Unicode character U+00E7",
+"input": "\u00E7",
+"output": [["Character", "\u00E7"]]},
+
+{"description": "Valid Unicode character U+00E8",
+"input": "\u00E8",
+"output": [["Character", "\u00E8"]]},
+
+{"description": "Valid Unicode character U+00E9",
+"input": "\u00E9",
+"output": [["Character", "\u00E9"]]},
+
+{"description": "Valid Unicode character U+00EA",
+"input": "\u00EA",
+"output": [["Character", "\u00EA"]]},
+
+{"description": "Valid Unicode character U+00EB",
+"input": "\u00EB",
+"output": [["Character", "\u00EB"]]},
+
+{"description": "Valid Unicode character U+00EC",
+"input": "\u00EC",
+"output": [["Character", "\u00EC"]]},
+
+{"description": "Valid Unicode character U+00ED",
+"input": "\u00ED",
+"output": [["Character", "\u00ED"]]},
+
+{"description": "Valid Unicode character U+00EE",
+"input": "\u00EE",
+"output": [["Character", "\u00EE"]]},
+
+{"description": "Valid Unicode character U+00EF",
+"input": "\u00EF",
+"output": [["Character", "\u00EF"]]},
+
+{"description": "Valid Unicode character U+00F0",
+"input": "\u00F0",
+"output": [["Character", "\u00F0"]]},
+
+{"description": "Valid Unicode character U+00F1",
+"input": "\u00F1",
+"output": [["Character", "\u00F1"]]},
+
+{"description": "Valid Unicode character U+00F2",
+"input": "\u00F2",
+"output": [["Character", "\u00F2"]]},
+
+{"description": "Valid Unicode character U+00F3",
+"input": "\u00F3",
+"output": [["Character", "\u00F3"]]},
+
+{"description": "Valid Unicode character U+00F4",
+"input": "\u00F4",
+"output": [["Character", "\u00F4"]]},
+
+{"description": "Valid Unicode character U+00F5",
+"input": "\u00F5",
+"output": [["Character", "\u00F5"]]},
+
+{"description": "Valid Unicode character U+00F6",
+"input": "\u00F6",
+"output": [["Character", "\u00F6"]]},
+
+{"description": "Valid Unicode character U+00F7",
+"input": "\u00F7",
+"output": [["Character", "\u00F7"]]},
+
+{"description": "Valid Unicode character U+00F8",
+"input": "\u00F8",
+"output": [["Character", "\u00F8"]]},
+
+{"description": "Valid Unicode character U+00F9",
+"input": "\u00F9",
+"output": [["Character", "\u00F9"]]},
+
+{"description": "Valid Unicode character U+00FA",
+"input": "\u00FA",
+"output": [["Character", "\u00FA"]]},
+
+{"description": "Valid Unicode character U+00FB",
+"input": "\u00FB",
+"output": [["Character", "\u00FB"]]},
+
+{"description": "Valid Unicode character U+00FC",
+"input": "\u00FC",
+"output": [["Character", "\u00FC"]]},
+
+{"description": "Valid Unicode character U+00FD",
+"input": "\u00FD",
+"output": [["Character", "\u00FD"]]},
+
+{"description": "Valid Unicode character U+00FE",
+"input": "\u00FE",
+"output": [["Character", "\u00FE"]]},
+
+{"description": "Valid Unicode character U+00FF",
+"input": "\u00FF",
+"output": [["Character", "\u00FF"]]},
+
+{"description": "Valid Unicode character U+D7FF",
+"input": "\uD7FF",
+"output": [["Character", "\uD7FF"]]},
+
+{"description": "Valid Unicode character U+E000",
+"input": "\uE000",
+"output": [["Character", "\uE000"]]},
+
+{"description": "Valid Unicode character U+FDCF",
+"input": "\uFDCF",
+"output": [["Character", "\uFDCF"]]},
+
+{"description": "Valid Unicode character U+FDF0",
+"input": "\uFDF0",
+"output": [["Character", "\uFDF0"]]},
+
+{"description": "Valid Unicode character U+FFFD",
+"input": "\uFFFD",
+"output": [["Character", "\uFFFD"]]},
+
+{"description": "Valid Unicode character U+10000",
+"input": "\uD800\uDC00",
+"output": [["Character", "\uD800\uDC00"]]},
+
+{"description": "Valid Unicode character U+1FFFD",
+"input": "\uD83F\uDFFD",
+"output": [["Character", "\uD83F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+20000",
+"input": "\uD840\uDC00",
+"output": [["Character", "\uD840\uDC00"]]},
+
+{"description": "Valid Unicode character U+2FFFD",
+"input": "\uD87F\uDFFD",
+"output": [["Character", "\uD87F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+30000",
+"input": "\uD880\uDC00",
+"output": [["Character", "\uD880\uDC00"]]},
+
+{"description": "Valid Unicode character U+3FFFD",
+"input": "\uD8BF\uDFFD",
+"output": [["Character", "\uD8BF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+40000",
+"input": "\uD8C0\uDC00",
+"output": [["Character", "\uD8C0\uDC00"]]},
+
+{"description": "Valid Unicode character U+4FFFD",
+"input": "\uD8FF\uDFFD",
+"output": [["Character", "\uD8FF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+50000",
+"input": "\uD900\uDC00",
+"output": [["Character", "\uD900\uDC00"]]},
+
+{"description": "Valid Unicode character U+5FFFD",
+"input": "\uD93F\uDFFD",
+"output": [["Character", "\uD93F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+60000",
+"input": "\uD940\uDC00",
+"output": [["Character", "\uD940\uDC00"]]},
+
+{"description": "Valid Unicode character U+6FFFD",
+"input": "\uD97F\uDFFD",
+"output": [["Character", "\uD97F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+70000",
+"input": "\uD980\uDC00",
+"output": [["Character", "\uD980\uDC00"]]},
+
+{"description": "Valid Unicode character U+7FFFD",
+"input": "\uD9BF\uDFFD",
+"output": [["Character", "\uD9BF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+80000",
+"input": "\uD9C0\uDC00",
+"output": [["Character", "\uD9C0\uDC00"]]},
+
+{"description": "Valid Unicode character U+8FFFD",
+"input": "\uD9FF\uDFFD",
+"output": [["Character", "\uD9FF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+90000",
+"input": "\uDA00\uDC00",
+"output": [["Character", "\uDA00\uDC00"]]},
+
+{"description": "Valid Unicode character U+9FFFD",
+"input": "\uDA3F\uDFFD",
+"output": [["Character", "\uDA3F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+A0000",
+"input": "\uDA40\uDC00",
+"output": [["Character", "\uDA40\uDC00"]]},
+
+{"description": "Valid Unicode character U+AFFFD",
+"input": "\uDA7F\uDFFD",
+"output": [["Character", "\uDA7F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+B0000",
+"input": "\uDA80\uDC00",
+"output": [["Character", "\uDA80\uDC00"]]},
+
+{"description": "Valid Unicode character U+BFFFD",
+"input": "\uDABF\uDFFD",
+"output": [["Character", "\uDABF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+C0000",
+"input": "\uDAC0\uDC00",
+"output": [["Character", "\uDAC0\uDC00"]]},
+
+{"description": "Valid Unicode character U+CFFFD",
+"input": "\uDAFF\uDFFD",
+"output": [["Character", "\uDAFF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+D0000",
+"input": "\uDB00\uDC00",
+"output": [["Character", "\uDB00\uDC00"]]},
+
+{"description": "Valid Unicode character U+DFFFD",
+"input": "\uDB3F\uDFFD",
+"output": [["Character", "\uDB3F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+E0000",
+"input": "\uDB40\uDC00",
+"output": [["Character", "\uDB40\uDC00"]]},
+
+{"description": "Valid Unicode character U+EFFFD",
+"input": "\uDB7F\uDFFD",
+"output": [["Character", "\uDB7F\uDFFD"]]},
+
+{"description": "Valid Unicode character U+F0000",
+"input": "\uDB80\uDC00",
+"output": [["Character", "\uDB80\uDC00"]]},
+
+{"description": "Valid Unicode character U+FFFFD",
+"input": "\uDBBF\uDFFD",
+"output": [["Character", "\uDBBF\uDFFD"]]},
+
+{"description": "Valid Unicode character U+100000",
+"input": "\uDBC0\uDC00",
+"output": [["Character", "\uDBC0\uDC00"]]},
+
+{"description": "Valid Unicode character U+10FFFD",
+"input": "\uDBFF\uDFFD",
+"output": [["Character", "\uDBFF\uDFFD"]]}
+
+]}
diff --git a/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test b/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
new file mode 100644
index 00000000..cf2fbe61
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
@@ -0,0 +1,27 @@
+{"tests" : [
+{"description": "Invalid Unicode character U+DFFF",
+"doubleEscaped":true,
+"input": "\\uDFFF",
+"output":["ParseError", ["Character", "\\uFFFD"]]},
+
+{"description": "Invalid Unicode character U+D800",
+"doubleEscaped":true,
+"input": "\\uD800",
+"output":["ParseError", ["Character", "\\uFFFD"]]},
+
+{"description": "Invalid Unicode character U+DFFF with valid preceding character",
+"doubleEscaped":true,
+"input": "a\\uDFFF",
+"output":["ParseError", ["Character", "a\\uFFFD"]]},
+
+{"description": "Invalid Unicode character U+D800 with valid following character",
+"doubleEscaped":true,
+"input": "\\uD800a",
+"output":["ParseError", ["Character", "\\uFFFDa"]]},
+
+{"description":"CR followed by U+0000",
+"input":"\r\u0000",
+"output":[["Character", "\n"], "ParseError", ["Character", "\u0000"]],
+"ignoreErrorOrder":true}
+]
+}
\ No newline at end of file
diff --git a/html5lib/tests/testdata/tokenizer/xmlViolation.test b/html5lib/tests/testdata/tokenizer/xmlViolation.test
new file mode 100644
index 00000000..137d9642
--- /dev/null
+++ b/html5lib/tests/testdata/tokenizer/xmlViolation.test
@@ -0,0 +1,22 @@
+{"xmlViolationTests": [
+
+{"description":"Non-XML character",
+"input":"a\uFFFFb",
+"ignoreErrorOrder":true,
+"output":["ParseError",["Character","a\uFFFDb"]]},
+
+{"description":"Non-XML space",
+"input":"a\u000Cb",
+"ignoreErrorOrder":true,
+"output":[["Character","a b"]]},
+
+{"description":"Double hyphen in comment",
+"input":"<!-- foo -- bar -->",
+"output":["ParseError",["Comment"," foo - - bar "]]},
+
+{"description":"FF between attributes",
+"input":"<a b=''\u000Cc=''>",
+"output":[["StartTag","a",{"b":"","c":""}]]}
+]}
+
+
diff --git a/html5lib/tests/testdata/tree-construction/adoption01.dat b/html5lib/tests/testdata/tree-construction/adoption01.dat
new file mode 100644
index 00000000..787e1b01
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/adoption01.dat
@@ -0,0 +1,194 @@
+#data
+<a><p></a></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <p>
+|       <a>
+
+#data
+<a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<button>2</a>3</button>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <button>
+|       <a>
+|         "2"
+|       "3"
+
+#data
+<a>1<b>2</a>3</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|       <b>
+|         "2"
+|     <b>
+|       "3"
+
+#data
+<a>1<div>2<div>3</a>4</div>5</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <div>
+|       <a>
+|         "2"
+|       <div>
+|         <a>
+|           "3"
+|         "4"
+|       "5"
+
+#data
+<table><a>1<p>2</a>3</p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <p>
+|       <a>
+|         "2"
+|       "3"
+|     <table>
+
+#data
+<b><b><a><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <b>
+|         <a>
+|         <p>
+|           <a>
+
+#data
+<b><a><b><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <a>
+|         <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<a><b><b><p></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <b>
+|     <b>
+|       <b>
+|         <p>
+|           <a>
+
+#data
+<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "1"
+|       <s>
+|         id="A"
+|         "2"
+|         <b>
+|           id="B"
+|           "3"
+|     <s>
+|       id="A"
+|       <b>
+|         id="B"
+|         "4"
+|     <b>
+|       id="B"
+|       "5"
+
+#data
+<table><a>1<td>2</td>3</table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "1"
+|     <a>
+|       "3"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "2"
+
+#data
+<table>A<td>B</td>C</table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "AC"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<a><svg><tr><input></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <svg svg>
+|         <svg tr>
+|           <svg input>
diff --git a/html5lib/tests/testdata/tree-construction/adoption02.dat b/html5lib/tests/testdata/tree-construction/adoption02.dat
new file mode 100644
index 00000000..d18151b4
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/adoption02.dat
@@ -0,0 +1,31 @@
+#data
+<b>1<i>2<p>3</b>4
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <i>
+|         "2"
+|     <i>
+|       <p>
+|         <b>
+|           "3"
+|         "4"
+
+#data
+<a><div><style></style><address><a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|         <style>
+|       <address>
+|         <a>
+|         <a>
diff --git a/html5lib/tests/testdata/tree-construction/comments01.dat b/html5lib/tests/testdata/tree-construction/comments01.dat
new file mode 100644
index 00000000..44f18768
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/comments01.dat
@@ -0,0 +1,135 @@
+#data
+FOO<!-- BAR -->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --!>BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR --   >BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR --   >BAZ -->
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX  -->
+|     "BAZ"
+
+#data
+FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  BAR -- <QUX> -- MUX -- >BAZ -->
+
+#data
+FOO<!---->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!--->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+FOO<!-->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!--  -->
+|     "BAZ"
+
+#data
+<?xml version="1.0">Hi
+#errors
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+|     "Hi"
+
+#data
+<?xml version="1.0">
+#errors
+#document
+| <!-- ?xml version="1.0" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?xml version
+#errors
+#document
+| <!-- ?xml version -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+FOO<!----->BAZ
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <!-- - -->
+|     "BAZ"
diff --git a/html5lib/tests/testdata/tree-construction/doctype01.dat b/html5lib/tests/testdata/tree-construction/doctype01.dat
new file mode 100644
index 00000000..ae457328
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/doctype01.dat
@@ -0,0 +1,370 @@
+#data
+<!DOCTYPE html>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!dOctYpE HtMl>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPEhtml>Hello
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE>Hello
+#errors
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE >Hello
+#errors
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato taco "ddd>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato sYstEM    >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE   potato       sYstEM  ggg>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM taco  >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM 'taco"'>Hello
+#errors
+#document
+| <!DOCTYPE potato "" "taco"">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "taco">Hello
+#errors
+#document
+| <!DOCTYPE potato "" "taco">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEM "tai'co">Hello
+#errors
+#document
+| <!DOCTYPE potato "" "tai'co">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato SYSTEMtaco "ddd">Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato grass SYSTEM taco>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIc >Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato pUbLIcgoof>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC goof>Hello
+#errors
+#document
+| <!DOCTYPE potato>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "go'of">Hello
+#errors
+#document
+| <!DOCTYPE potato "go'of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go'of'>Hello
+#errors
+#document
+| <!DOCTYPE potato "go" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC 'go:hh   of' >Hello
+#errors
+#document
+| <!DOCTYPE potato "go:hh   of" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
+#errors
+#document
+| <!DOCTYPE potato "W3C-//dfdf" "">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+   "http://www.w3.org/TR/html4/strict.dtd">Hello
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE ...>Hello
+#errors
+#document
+| <!DOCTYPE ...>
+| <html>
+|   <head>
+|   <body>
+|     "Hello"
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [ 
+<!-- internal declarations -->
+]>
+#errors
+#document
+| <!DOCTYPE root-element>
+| <html>
+|   <head>
+|   <body>
+|     "]>"
+
+#data
+<!DOCTYPE html PUBLIC
+  "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
+    "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
+#errors
+#document
+| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "Mine!"
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
+#errors
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+| <html>
+|   <head>
+|   <body>
diff --git a/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat b/html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
new file mode 100644
index 0000000000000000000000000000000000000000..905b94e43ad1b316eaa9246e7cb2c247d44c0181
GIT binary patch
literal 6639
zcmc(ky>8nu5XUnM=sPgV;7QPukunfC9Xd6ry%a$P*`iYewq;04ffRkUqL0*9=#j^x
zKHZA4C}%NEllTAe?syWJyCg1RkLTZC=lu40@%XTMc;2R|w~n)ym$`Q*vMkMV?=DF<
zZwGNKyq}DJD-QcPG#E5Roap8{P2RBy{a}wtn`#scWWF})dtj?m^1~G*r&#7D+ntJe
z9L;WKJk0T%g<qvJ^(5(W`R9YNKfiy?e2f7)-ndl_X2ttSM85R24}lGWn~}6DcFWjG
z0RXb$fpl<=1R9o945qkul~Nf(HW8X_EY*50OX;&|rEJ?ImYTp_XFICcb1hxEi7)Me
z!sX)2r>B)orSGUn<Asl(=B{H05;lIK@@ue6!%-|-f<tJ_;yM*B6f`3>gt$`Nj2*`w
zmW9q}4Pw3q^a#{}S+!}nc6Cr{4)L_+gyNtJ)>Pmv;S1>5wdhOX3+TgZ(U-!#>G4H7
zbo(qJ5lbpI1r@V<#^F-jg25qyHdAkpm2)74=^WJmP-^q4qV2N*Y>=Rnu!Ws$w~wC}
ztFKQVnL3yA`q%EV-Wc<rGd&$G``iE54ME+Sx<F_!^AN8U`p22q&%g+ZrqNbIq1!8c
zsFe^8@uT6!!MsJh-U~RU*8>@1w>>M~6L3A9{GE%v*c5FQ-9K1{=97+I%qHExX6YNe
z&Nr7;{xZ@E>0X2z%jS})4HE=~$crF=T4#oHcOb+Dl?ZAXliCc#&jVuaPQrFBV)Af1
zqIcxg9~Lfff=)_uOC@B~1@~J}gNe;#Z6$7$an}0{B{<C|#qP+EvJMDMx>S@Fw$0*$
z$OUXjEnqrqc49fHZ8tPO>Kj`NbTici!u0Tp-TKzP3>@81tFIyU4Ogi@7Vwh0QDoIE
zwS{NUBxMp^bCdU##5O%bSVR3Wj)2KkFbDe+1WaU%Cfg+u?6nD`iH1o@+}|w^uzdGJ
zME7oJIJ$s1l$3v8DI4<t8FCDRBA;eaCXz`&P|XLqE*1$C<?%6h!y?~yO?HrLn97#O
Im$JV17pVBTQUCw|

literal 0
HcmV?d00001

diff --git a/html5lib/tests/testdata/tree-construction/entities01.dat b/html5lib/tests/testdata/tree-construction/entities01.dat
new file mode 100644
index 00000000..c8073b78
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/entities01.dat
@@ -0,0 +1,603 @@
+#data
+FOO&gt;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gtBAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>BAR"
+
+#data
+FOO&gt BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO> BAR"
+
+#data
+FOO&gt;;;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO>;;BAR"
+
+#data
+I'm &notit; I tell you
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ¬it; I tell you"
+
+#data
+I'm &notin; I tell you
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "I'm ∉ I tell you"
+
+#data
+FOO& BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO& BAR"
+
+#data
+FOO&<BAR>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&"
+|     <bar>
+
+#data
+FOO&&&&gt;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&&&>BAR"
+
+#data
+FOO&#41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#X41;BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOABAR"
+
+#data
+FOO&#BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#BAR"
+
+#data
+FOO&#ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#ZOO"
+
+#data
+FOO&#xBAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOºR"
+
+#data
+FOO&#xZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#xZOO"
+
+#data
+FOO&#XZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO&#XZOO"
+
+#data
+FOO&#41BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO)BAR"
+
+#data
+FOO&#x41BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO䆺R"
+
+#data
+FOO&#x41ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOAZOO"
+
+#data
+FOO&#x0000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#x0078;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOxZOO"
+
+#data
+FOO&#x0079;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOyZOO"
+
+#data
+FOO&#x0080;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO€ZOO"
+
+#data
+FOO&#x0081;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0082;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‚ZOO"
+
+#data
+FOO&#x0083;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOƒZOO"
+
+#data
+FOO&#x0084;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO„ZOO"
+
+#data
+FOO&#x0085;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO…ZOO"
+
+#data
+FOO&#x0086;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO†ZOO"
+
+#data
+FOO&#x0087;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‡ZOO"
+
+#data
+FOO&#x0088;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOˆZOO"
+
+#data
+FOO&#x0089;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‰ZOO"
+
+#data
+FOO&#x008A;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŠZOO"
+
+#data
+FOO&#x008B;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‹ZOO"
+
+#data
+FOO&#x008C;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŒZOO"
+
+#data
+FOO&#x008D;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x008E;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŽZOO"
+
+#data
+FOO&#x008F;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0090;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x0091;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO‘ZOO"
+
+#data
+FOO&#x0092;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO’ZOO"
+
+#data
+FOO&#x0093;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO“ZOO"
+
+#data
+FOO&#x0094;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO”ZOO"
+
+#data
+FOO&#x0095;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO•ZOO"
+
+#data
+FOO&#x0096;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO–ZOO"
+
+#data
+FOO&#x0097;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO—ZOO"
+
+#data
+FOO&#x0098;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO˜ZOO"
+
+#data
+FOO&#x0099;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO™ZOO"
+
+#data
+FOO&#x009A;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOšZOO"
+
+#data
+FOO&#x009B;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO›ZOO"
+
+#data
+FOO&#x009C;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOœZOO"
+
+#data
+FOO&#x009D;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x009E;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOžZOO"
+
+#data
+FOO&#x009F;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOŸZOO"
+
+#data
+FOO&#x00A0;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO ZOO"
+
+#data
+FOO&#xD7FF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO퟿ZOO"
+
+#data
+FOO&#xD800;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xD801;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFE;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xDFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xE000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOOZOO"
+
+#data
+FOO&#x10FFFE;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿾ZOO"
+
+#data
+FOO&#x1087D4;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􈟔ZOO"
+
+#data
+FOO&#x10FFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO􏿿ZOO"
+
+#data
+FOO&#x110000;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
+
+#data
+FOO&#xFFFFFF;ZOO
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO�ZOO"
diff --git a/html5lib/tests/testdata/tree-construction/entities02.dat b/html5lib/tests/testdata/tree-construction/entities02.dat
new file mode 100644
index 00000000..e2fb42a0
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/entities02.dat
@@ -0,0 +1,249 @@
+#data
+<div bar="ZZ&gt;YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>YY"
+
+#data
+<div bar="ZZ&"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar='ZZ&'></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar=ZZ&></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&"
+
+#data
+<div bar="ZZ&gt=YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt=YY"
+
+#data
+<div bar="ZZ&gt0YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt0YY"
+
+#data
+<div bar="ZZ&gt9YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gt9YY"
+
+#data
+<div bar="ZZ&gtaYY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtaYY"
+
+#data
+<div bar="ZZ&gtZYY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&gtZYY"
+
+#data
+<div bar="ZZ&gt YY"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ> YY"
+
+#data
+<div bar="ZZ&gt"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar='ZZ&gt'></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar=ZZ&gt></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ>"
+
+#data
+<div bar="ZZ&pound_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod_id=23"
+
+#data
+<div bar="ZZ&pound;_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ£_id=23"
+
+#data
+<div bar="ZZ&prod;_id=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ∏_id=23"
+
+#data
+<div bar="ZZ&pound=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&pound=23"
+
+#data
+<div bar="ZZ&prod=23"></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       bar="ZZ&prod=23"
+
+#data
+<div>ZZ&pound_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod_id=23"
+
+#data
+<div>ZZ&pound;_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£_id=23"
+
+#data
+<div>ZZ&prod;_id=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ∏_id=23"
+
+#data
+<div>ZZ&pound=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ£=23"
+
+#data
+<div>ZZ&prod=23</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "ZZ&prod=23"
diff --git a/html5lib/tests/testdata/tree-construction/html5test-com.dat b/html5lib/tests/testdata/tree-construction/html5test-com.dat
new file mode 100644
index 00000000..d7cb71db
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/html5test-com.dat
@@ -0,0 +1,246 @@
+#data
+<div<div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div<div>
+
+#data
+<div foo<bar=''>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo<bar=""
+
+#data
+<div foo=`bar`>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="`bar`"
+
+#data
+<div \"foo=''>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       \"foo=""
+
+#data
+<a href='\nbar'></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="\nbar"
+
+#data
+<!DOCTYPE html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+&lang;&rang;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "⟨⟩"
+
+#data
+&apos;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "'"
+
+#data
+&ImaginaryI;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "ⅈ"
+
+#data
+&Kopf;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "𝕂"
+
+#data
+&notinva;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "∉"
+
+#data
+<?import namespace="foo" implementation="#bar">
+#errors
+#document
+| <!-- ?import namespace="foo" implementation="#bar" -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!--foo--bar-->
+#errors
+#document
+| <!-- foo--bar -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<![CDATA[x]]>
+#errors
+#document
+| <!-- [CDATA[x]] -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea><!--</textarea>--></textarea>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<textarea><!--</textarea>-->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--"
+|     "-->"
+
+#data
+<style><!--</style>--></style>
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>-->
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "-->"
+
+#data
+<ul><li>A </li> <li>B</li></ul>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         "A "
+|       " "
+|       <li>
+|         "B"
+
+#data
+<table><form><input type=hidden><input></form><div></div></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     <div>
+|     <table>
+|       <form>
+|       <input>
+|         type="hidden"
+
+#data
+<i>A<b>B<p></i>C</b>D
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "A"
+|       <b>
+|         "B"
+|     <b>
+|     <p>
+|       <b>
+|         <i>
+|         "C"
+|       "D"
+
+#data
+<div></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<svg></svg>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<math></math>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
diff --git a/html5lib/tests/testdata/tree-construction/inbody01.dat b/html5lib/tests/testdata/tree-construction/inbody01.dat
new file mode 100644
index 00000000..3f2bd374
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/inbody01.dat
@@ -0,0 +1,43 @@
+#data
+<button>1</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|       "1"
+
+#data
+<foo>12</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       
+|         "2"
+
+#data
+<dd>1</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|       "1"
+
+#data
+<foo>1<dd>2</foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "1"
+|       <dd>
+|         "2"
diff --git a/html5lib/tests/testdata/tree-construction/isindex.dat b/html5lib/tests/testdata/tree-construction/isindex.dat
new file mode 100644
index 00000000..88325ffe
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/isindex.dat
@@ -0,0 +1,40 @@
+#data
+<isindex>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<isindex name="A" action="B" prompt="C" foo="D">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       action="B"
+|       <hr>
+|       <label>
+|         "C"
+|         <input>
+|           foo="D"
+|           name="isindex"
+|       <hr>
+
+#data
+<form><isindex>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
diff --git a/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat b/html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
new file mode 100644
index 0000000000000000000000000000000000000000..a5ebb1eb285116af391137bc94beac0c8a6834b4
GIT binary patch
literal 115
zcmXZUQ3`+{41i&ucZ#9c5brYEqF^T2f`Sg8m2W@)!xxy0Am++fibh!_xp`HU=1fj=
l5Tv!*b_iUjqsV4(V_d9g>VZ9lc;ttC7t#O7YxuDS4-Zl&BR>ED

literal 0
HcmV?d00001

diff --git a/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat b/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
new file mode 100644
index 00000000..5a920846
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
@@ -0,0 +1,52 @@
+#data
+<input type="hidden"><frameset>
+#errors
+21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+31: “frameset” start tag seen.
+31: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><table><caption><svg>foo</table>bar
+#errors
+47: End tag “table” did not match the name of the current open element (“svg”).
+47: “table” closed but “caption” was still open.
+47: End tag “table” seen, but there were open elements.
+36: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           "foo"
+|     "bar"
+
+#data
+<table><tr><td><svg><desc><td></desc><circle>
+#errors
+7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+30: A table cell was implicitly closed, but there were open elements.
+26: Unclosed element “desc”.
+20: Unclosed element “svg”.
+37: Stray end tag “desc”.
+45: End of file seen and there were open elements.
+45: Unclosed element “circle”.
+7: Unclosed element “table”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
+|             <circle>
diff --git a/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat b/html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
new file mode 100644
index 0000000000000000000000000000000000000000..04cc11fb9d458ea32dca02e2f3bf39221196ab8e
GIT binary patch
literal 4166
zcmds4&1%~~5Y}1XcNojiOL5|Zdr6iB6Q@@dnjYGa!^&DaD*8j(rYZE*N*}4O(Ai!6
zt?Ym#%MQd~yz+X#nfd0M+40P0g4rKk_ucGyu~@9HzqzhG<5`wuxjplf&5wx3!u}29
zQA8od1>ll1zgT*S|4T0c9E6$RdB?_+5>}tF$TnjU&$*!FvRd{rQXeva!GcpkGm9M!
zZBWBlo0XH%V%aC7h2%Ws8$qo;*=zDp0+b3F08~mq!5(ow4OtKi{*2LVgD~WoB_9R=
z%96mMsPR;h$nTtgfB$G~Tu5~MsAZ5p?I@Yv->g@6t9!$ThX*>G;HMo(<c>}#7Rl5a
zZg4uE1I7jOIW4nFO4J6iM;kDRJYY@HxlJ-2>|)pZu4LM<KOUh3ErDsMA{%qAZOUw$
zscvR?JZJVK)-qamv2krWRmhr-vcp#rkm+dl=W)$LH~WnyKCXT2=DO^$@Rb}6$4@S`
zDy!WdH|zeTS5P`WCOgJYv%MecK8>qS(UCIoh@(L9F*ZXarNczOPxy50-rRltbPH<s
zA!)|x#GcqI^c|On6=j}5m2?=K6mlgf$6nP%Y{F?5Ca>+lsqMcUzhGX-DG?dIeM%yw
zq)6Z5tV+moc?F-@Px$g4N7@AhG2|lSEV{6lAFkjw_958<_GvD+SlP=VmQ!lVHXJsI
znhY+?3E0d<$JA<%tK<^VtQR#nU@+CT{-PMJ<%52yKtV-o{8a81dy0d-O}vj9)n^7k
zT4d@%G%wJ%%qhlePD%yYMMpP?=tpcJ%YZV=t3+x1nKCnh=v}&mgl&nSNPf^%ki)ze
l`$yqfayHMBo}R^L^DOS^S$;Op@}8cl(m$8f+I>c;?LSw^)ujLc

literal 0
HcmV?d00001

diff --git a/html5lib/tests/testdata/tree-construction/scriptdata01.dat b/html5lib/tests/testdata/tree-construction/scriptdata01.dat
new file mode 100644
index 00000000..76b67f4b
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/scriptdata01.dat
@@ -0,0 +1,308 @@
+#data
+FOO<script>'Hello'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'Hello'"
+|     "BAR"
+
+#data
+FOO<script></script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script >BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script></script/ >BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script type="text/plain"></scriptx>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "</scriptx>BAR"
+
+#data
+FOO<script></script foo=">" dd>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|     "BAR"
+
+#data
+FOO<script>'<'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<'"
+|     "BAR"
+
+#data
+FOO<script>'<!'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!'"
+|     "BAR"
+
+#data
+FOO<script>'<!-'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-'"
+|     "BAR"
+
+#data
+FOO<script>'<!--'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!--'"
+|     "BAR"
+
+#data
+FOO<script>'<!---'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!---'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-->'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- potato'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- potato'"
+|     "BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --'</script>BAR"
+
+#data
+FOO<script>'<!-- <sCrIpt> -->'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       "'<!-- <sCrIpt> -->'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> --!>'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt> -- >'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt '</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt\'"
+|     "BAR"
+
+#data
+FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "FOO"
+|     <script>
+|       type="text/plain"
+|       "'<!-- <sCrIpt/'</script>BAR"
+|     "QUX"
diff --git a/html5lib/tests/testdata/tree-construction/tables01.dat b/html5lib/tests/testdata/tree-construction/tables01.dat
new file mode 100644
index 00000000..c4b47e48
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tables01.dat
@@ -0,0 +1,212 @@
+#data
+<table><th>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+
+#data
+<table><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><col foo='bar'>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|           foo="bar"
+
+#data
+<table><colgroup></html>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+<table></table>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     
+|       "foo"
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><select><option>3</select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "3"
+|     <table>
+
+#data
+<table><select><table></table></select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|     <table>
+
+#data
+<table><select></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+
+#data
+<table><select><option>A<tr><td>B</td></tr></table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "B"
+
+#data
+<table><td></body></caption></col></colgroup></html>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td>A</table>B
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+|     "B"
+
+#data
+<table><tr><caption>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <caption>
+
+#data
+<table><tr></body></caption></col></colgroup></html></td></th><td>foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "foo"
+
+#data
+<table><td><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|         <tr>
+
+#data
+<table><td><button><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <button>
+|           <td>
+
+#data
+<table><tr><td><svg><desc><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg desc>
+|           <td>
diff --git a/html5lib/tests/testdata/tree-construction/tests1.dat b/html5lib/tests/testdata/tree-construction/tests1.dat
new file mode 100644
index 00000000..cbf8bdda
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests1.dat
@@ -0,0 +1,1952 @@
+#data
+Test
+#errors
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+OneTwo
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       "One"
+|     
+|       "Two"
+
+#data
+Line1Line2Line3Line4
+#errors
+Line: 1 Col: 5 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Line1"
+|     
+|     "Line2"
+|     
+|     "Line3"
+|     
+|     "Line4"
+
+#data
+<html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></head><body></body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end tag (body).
+Line: 1 Col: 26 Unexpected end tag (html).
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><head><body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<html><body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<head></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (html). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</head>
+#errors
+Line: 1 Col: 7 Unexpected end tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</body>
+#errors
+Line: 1 Col: 7 Unexpected end tag (body). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected end tag (body) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+</html>
+#errors
+Line: 1 Col: 7 Unexpected end tag (html). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected end tag (html) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<b><table><td><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>X
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 30 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+|       "X"
+
+#data
+<h1>Hello<h2>World
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+13: Heading cannot be a child of another heading.
+18: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       "Hello"
+|     <h2>
+|       "World"
+
+#data
+<a>X<a>Y</a>Z</a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 10 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 24 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     
+|       <a>
+|         "X"
+|       <a>
+|         "Y"
+|       "Z"
+
+#data
+<b><button>foo</b>bar
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+|         "foo"
+|       "bar"
+
+#data
+<!DOCTYPE html><span><button>foo</span>bar
+#errors
+39: End tag “span” seen but there were unclosed elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <span>
+|       <button>
+|         "foobar"
+
+#data
+<b><div><marquee></b></div>X
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+Line: 1 Col: 24 Unexpected end tag (p). Ignored.
+Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           
+|           "X"
+
+#data
+<script><div></script></div><title></title>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected end tag (div). Ignored.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<div>"
+|     <title>
+|       ""
+|   <body>
+|     
+|     
+
+#data
+<!--><div>--<!-->
+#errors
+Line: 1 Col: 5 Incorrect comment.
+Line: 1 Col: 10 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 17 Incorrect comment.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "--"
+|       <!--  -->
+
+#data
+<hr>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <hr>
+|     
+
+#data
+<select><b><option><select><option></b></select>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
+Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 48 Unexpected end tag (select). Ignored.
+Line: 1 Col: 49 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+|       "X"
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 63 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 64 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+|       <b>
+|         "X"
+|       "C"
+|     <a>
+|       "Y"
+
+#data
+<a X>0<b>1<a Y>2
+#errors
+Line: 1 Col: 5 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       x=""
+|       "0"
+|       <b>
+|         "1"
+|     <b>
+|       <a>
+|         y=""
+|         "2"
+
+#data
+<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->
+#errors
+Line: 1 Col: 7 Unexpected '-' after '--' found in comment.
+Line: 1 Col: 14 Unexpected start tag (font). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 41 Unexpected start tag (b) in table context caused voodoo mode.
+Line: 1 Col: 48 Unexpected implied end tag (b) in the table phase.
+Line: 1 Col: 48 Unexpected table cell start tag (th) in the table body phase.
+Line: 1 Col: 63 Got table cell end tag (th) while required end tags are missing.
+Line: 1 Col: 71 Unexpected end of file. Expected table content.
+#document
+| <!-- - -->
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <div>
+|         "helloexcite!"
+|         <b>
+|           "me!"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <th>
+|                 <i>
+|                   "please!"
+|             <!-- X -->
+
+#data
+<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
+#errors
+Line: 1 Col: 61 Unexpected end tag (li). Missing end tag (body).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+|       "hello"
+|     <li>
+|       "world"
+|       <ul>
+|         "how"
+|         <li>
+|           "do"
+|       "you"
+|   <!-- do -->
+
+#data
+<!DOCTYPE html>A<option>B<optgroup>C<select>D</option>E
+#errors
+Line: 1 Col: 54 Unexpected end tag (option) in the select phase. Ignored.
+Line: 1 Col: 55 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <option>
+|       "B"
+|     <optgroup>
+|       "C"
+|       <select>
+|         "DE"
+
+#data
+<
+#errors
+Line: 1 Col: 1 Expected tag name. Got something else instead
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<#
+#errors
+Line: 1 Col: 1 Expected tag name. Got something else instead
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<#"
+
+#data
+</
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected end of file.
+Line: 1 Col: 2 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "</"
+
+#data
+</#
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character '#' found.
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?#
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?# -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 2 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!#
+#errors
+Line: 1 Col: 3 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 3 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- # -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COMMENT?>
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 11 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?COMMENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COMMENT>
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 10 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- COMMENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COMMENT >
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
+Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  COMMENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<?COM--MENT?>
+#errors
+Line: 1 Col: 1 Expected tag name. Got '?' instead. (HTML doesn't support processing instructions.)
+Line: 1 Col: 13 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- ?COM--MENT? -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!COM--MENT>
+#errors
+Line: 1 Col: 2 Expected '--' or 'DOCTYPE'. Not found.
+Line: 1 Col: 12 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- COM--MENT -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+</ COM--MENT >
+#errors
+Line: 1 Col: 2 Expected closing tag. Unexpected character ' ' found.
+Line: 1 Col: 14 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!--  COM--MENT  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><style> EOF
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       " EOF"
+|   <body>
+
+#data
+<!DOCTYPE html><script> <!-- </script> --> </script> EOF
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "-->  EOF"
+
+#data
+<b></b>TEST
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 10 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     
+|       <b>
+|       "TEST"
+
+#data
+<b></b>TEST
+#errors
+Line: 1 Col: 8 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end tag (p). Ignored.
+Line: 1 Col: 23 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       id="a"
+|       <b>
+|     
+|       id="b"
+|       "TEST"
+
+#data
+<b id=a><b id=b></b>TEST
+#errors
+Line: 1 Col: 8 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (p). Ignored.
+Line: 1 Col: 27 End tag (b) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 31 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       id="a"
+|       
+|         <b>
+|           id="b"
+|       "TEST"
+
+#data
+<!DOCTYPE html><title>U-test</title><body><div>Test<u></div></body>
+#errors
+Line: 1 Col: 61 Unexpected end tag (p). Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "U-test"
+|   <body>
+|     <div>
+|       
+|         "Test"
+|         <u>
+
+#data
+<!DOCTYPE html><font><table></font></table></font>
+#errors
+Line: 1 Col: 35 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 35 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <table>
+
+#data
+<font>hello<b>cruel</font>world
+#errors
+Line: 1 Col: 6 Unexpected start tag (font). Expected DOCTYPE.
+Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 29 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     
+|       <font>
+|         "hello"
+|         <b>
+|           "cruel"
+|       <b>
+|         "world"
+
+#data
+<b>Test</i>Test
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 11 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "TestTest"
+
+#data
+<b>A<cite>B<div>C
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<b>A<cite>B<div>C</cite>D
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end tag (cite). Ignored.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|         <div>
+|           "CD"
+
+#data
+<b>A<cite>B<div>C</b>D
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 21 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 22 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "A"
+|       <cite>
+|         "B"
+|     <div>
+|       <b>
+|         "C"
+|       "D"
+
+#data
+
+#errors
+Line: 1 Col: 0 Unexpected End of file. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<DIV>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 5 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<DIV> abc
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 9 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc"
+
+#data
+<DIV> abc <B>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+
+#data
+<DIV> abc <B> def
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 17 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def"
+
+#data
+<DIV> abc <B> def <I>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+
+#data
+<DIV> abc <B> def <I> ghi
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi"
+
+#data
+<DIV> abc <B> def <I> ghi 
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           
+
+#data
+<DIV> abc <B> def <I> ghi  jkl
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|           
+|             " jkl"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 38 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         
+|           <b>
+|             " jkl "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|         
+|           <b>
+|             " jkl "
+|           " mno"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I>
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr"
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr 
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+
+#data
+<DIV> abc <B> def <I> ghi  jkl </B> mno </I> pqr  stu
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 38 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 47 End tag (i) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 60 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       " abc "
+|       <b>
+|         " def "
+|         <i>
+|           " ghi "
+|       <i>
+|       
+|         <i>
+|           <b>
+|             " jkl "
+|           " mno "
+|         " pqr "
+|       " stu"
+
+#data
+<test attribute---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------->
+#errors
+Line: 1 Col: 1040 Unexpected start tag (test). Expected DOCTYPE.
+Line: 1 Col: 1040 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <test>
+|       attribute----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------=""
+
+#data
+<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe
+#errors
+Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 39 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 39 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 39 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 45 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|       <a>
+|         href="foo"
+|         "br"
+|       <a>
+|         href="foo"
+|         "x"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|     <a>
+|       href="foo"
+|       "aoe"
+
+#data
+<a href="blah">aba<table><tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+Line: 1 Col: 15 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 60 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "abax"
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 href="foo"
+|                 "br"
+|       "aoe"
+
+#data
+<table><a href="blah">aba<tr><td><a href="foo">br</td></tr>x</table>aoe
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 29 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 54 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 68 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 71 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="blah"
+|       "aba"
+|     <a>
+|       href="blah"
+|       "x"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <a>
+|               href="foo"
+|               "br"
+|     <a>
+|       href="blah"
+|       "aoe"
+
+#data
+<a href=a>aa<marquee>aa<a href=b>bb</marquee>aa
+#errors
+Line: 1 Col: 10 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 45 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 47 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="a"
+|       "aa"
+|       <marquee>
+|         "aa"
+|         <a>
+|           href="b"
+|           "bb"
+|       "aa"
+
+#data
+<wbr><strike><code></strike><code><strike></code>
+#errors
+Line: 1 Col: 5 Unexpected start tag (wbr). Expected DOCTYPE.
+Line: 1 Col: 28 End tag (strike) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 49 Unexpected end tag (code). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     <strike>
+|       <code>
+|     <code>
+|       <code>
+|         <strike>
+
+#data
+<!DOCTYPE html><spacer>foo
+#errors
+26: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <spacer>
+|       "foo"
+
+#data
+<title><meta></title><link><title><meta></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<meta>"
+|     <link>
+|     <title>
+|       "<meta>"
+|   <body>
+
+#data
+<style><!--</style><meta><script>--><link></script>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 51 Unexpected end of file. Expected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|     <meta>
+|     <script>
+|       "--><link>"
+|   <body>
+
+#data
+<head><meta></head><link>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected start tag (link) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <meta>
+|     <link>
+|   <body>
+
+#data
+<table><tr><tr><td><td><span><th><span>X</table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 33 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 48 Got table cell end tag (th) while required end tags are missing.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|         <tr>
+|           <td>
+|           <td>
+|             <span>
+|           <th>
+|             <span>
+|               "X"
+
+#data
+<body><body><base><link><meta><title></title><body></body>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (body).
+Line: 1 Col: 54 Unexpected start tag (body).
+Line: 1 Col: 64 Unexpected end tag (p). Missing end tag (body).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     <link>
+|     <meta>
+|     <title>
+|       ""
+|     
+
+#data
+<textarea></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       ""
+
+#data
+<image>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected start tag (image). Treated as img.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <img>
+
+#data
+<a><table><a></table><a><div><a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 13 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 13 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 21 Unexpected end tag (table). Expected end tag (a).
+Line: 1 Col: 27 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 27 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 32 Unexpected end tag (p). Ignored.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 35 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|     
+|       <a>
+|     <div>
+|       <a>
+
+#data
+<head><meta>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+|     
+
+#data
+<head></html><meta>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected start tag (meta).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     
+
+#data
+<b><table><td><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 25 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<b><table><td></b><i></table>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 18 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <i>
+
+#data
+<h1><h2>
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+8: Heading cannot be a child of another heading.
+8: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|     <h2>
+
+#data
+<a><a></a></a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 9 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 21 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     
+|       <a>
+|       <a>
+
+#data
+<b><button></b></button></b>
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <button>
+|       <b>
+
+#data
+<b><div><marquee></b></div>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+Line: 1 Col: 24 Unexpected end tag (p). Ignored.
+Line: 1 Col: 28 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 34 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 34 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|     <div>
+|       <b>
+|         <marquee>
+|           
+
+#data
+<script></script></div><title></title>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (div). Ignored.
+#document
+| <html>
+|   <head>
+|     <script>
+|     <title>
+|   <body>
+|     
+|     
+
+#data
+<hr>
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end tag (p). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <hr>
+|     
+
+#data
+<select><b><option><select><option></b></select>
+#errors
+Line: 1 Col: 8 Unexpected start tag (select). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected start tag token (b) in the select phase. Ignored.
+Line: 1 Col: 27 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 39 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 48 Unexpected end tag (select). Ignored.
+Line: 1 Col: 48 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|     <option>
+
+#data
+<html><head><title></title><body></body></html>
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|   <body>
+
+#data
+<a><table><td><a><table></table><a></tr><a></table><a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 35 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 40 Got table cell end tag (td) while required end tags are missing.
+Line: 1 Col: 43 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 43 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 43 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 51 Unexpected implied end tag (a) in the table phase.
+Line: 1 Col: 54 Unexpected start tag (a) implies end tag (a).
+Line: 1 Col: 54 End tag (a) violates step 1, paragraph 2 of the adoption agency algorithm.
+Line: 1 Col: 54 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <a>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <a>
+|                 <table>
+|               <a>
+|     <a>
+
+#data
+<ul><li></li><div><li></div><li><li><div><li><address><li><b><em></b><li></ul>
+#errors
+Line: 1 Col: 4 Unexpected start tag (ul). Expected DOCTYPE.
+Line: 1 Col: 45 Missing end tag (div, li).
+Line: 1 Col: 58 Missing end tag (address, li).
+Line: 1 Col: 69 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|       <div>
+|         <li>
+|       <li>
+|       <li>
+|         <div>
+|       <li>
+|         <address>
+|       <li>
+|         <b>
+|           <em>
+|       <li>
+
+#data
+<ul><li><ul></li><li>a</li></ul></li></ul>
+#errors
+XXX: fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <ul>
+|           <li>
+|             "a"
+
+#data
+<frameset><frame><frameset><frame></frameset><noframes></noframes></frameset>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+
+#data
+<h1><table><td><h3></table><h3></h1>
+#errors
+4: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+15: “td” start tag in table body.
+27: Unclosed elements.
+31: Heading cannot be a child of another heading.
+36: End tag “h1” seen but there were unclosed elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <table>
+|         <tbody>
+|           <tr>
+|             <td>
+|               <h3>
+|     <h3>
+
+#data
+<table><colgroup><col><colgroup><col><col><col><colgroup><col><col><thead><tr><td></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|         <col>
+|       <colgroup>
+|         <col>
+|         <col>
+|       <thead>
+|         <tr>
+|           <td>
+
+#data
+<table><col><tbody><col><tr><col><td><col></table><col>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 55 Unexpected start tag col. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|         <col>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+|         <col>
+
+#data
+<table><colgroup><tbody><colgroup><tr><colgroup><td><colgroup></table><colgroup>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 52 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 80 Unexpected start tag colgroup. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+|       <tbody>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|       <colgroup>
+|       <tbody>
+|         <tr>
+|           <td>
+|       <colgroup>
+
+#data
+</strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
+Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
+Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
+Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
+Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
+Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
+Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
+Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
+Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
+Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
+Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
+Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
+Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
+Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
+Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
+Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
+Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
+Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
+Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
+Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
+Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
+Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 140 This element (img) has no end tag.
+Line: 1 Col: 148 Unexpected end tag (title). Ignored.
+Line: 1 Col: 155 Unexpected end tag (span). Ignored.
+Line: 1 Col: 163 Unexpected end tag (style). Ignored.
+Line: 1 Col: 172 Unexpected end tag (script). Ignored.
+Line: 1 Col: 180 Unexpected end tag (table). Ignored.
+Line: 1 Col: 185 Unexpected end tag (th). Ignored.
+Line: 1 Col: 190 Unexpected end tag (td). Ignored.
+Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 203 This element (frame) has no end tag.
+Line: 1 Col: 210 This element (area) has no end tag.
+Line: 1 Col: 217 Unexpected end tag (link). Ignored.
+Line: 1 Col: 225 This element (param) has no end tag.
+Line: 1 Col: 230 This element (hr) has no end tag.
+Line: 1 Col: 238 This element (input) has no end tag.
+Line: 1 Col: 244 Unexpected end tag (col). Ignored.
+Line: 1 Col: 251 Unexpected end tag (base). Ignored.
+Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 269 This element (basefont) has no end tag.
+Line: 1 Col: 279 This element (bgsound) has no end tag.
+Line: 1 Col: 287 This element (embed) has no end tag.
+Line: 1 Col: 296 This element (spacer) has no end tag.
+Line: 1 Col: 300 Unexpected end tag (p). Ignored.
+Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 460 This element (wbr) has no end tag.
+Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 513 Unexpected end tag (html). Ignored.
+Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 520 Unexpected end tag (head). Ignored.
+Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 537 This element (image) has no end tag.
+Line: 1 Col: 547 This element (isindex) has no end tag.
+Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 599 Unexpected end tag (option). Ignored.
+Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     
+
+#data
+<table><tr></strong></b></em></i></u></strike></s></blink></tt></pre></big></small></font></select></h1></h2></h3></h4></h5></h6></body></br></a></img></title></span></style></script></table></th></td></tr></frame></area></link></param></hr></input></col></base></meta></basefont></bgsound></embed></spacer></dd></dt></caption></colgroup></tbody></tfoot></thead></address></blockquote></center></dir></div></dl></fieldset></listing></menu></ol></ul></li></nobr></wbr></form></button></marquee></object></html></frameset></head></iframe></image></isindex></noembed></noframes></noscript></optgroup></option></plaintext></textarea>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
+Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
+Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
+Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
+Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
+Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
+Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
+Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
+Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
+Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
+Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
+Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
+Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
+Line: 1 Col: 99 Unexpected end tag (select). Ignored.
+Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
+Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
+Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
+Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
+Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
+Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
+Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
+Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
+Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
+Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
+Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
+Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
+Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
+Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
+Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
+Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
+Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
+Line: 1 Col: 151 This element (img) has no end tag.
+Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
+Line: 1 Col: 159 Unexpected end tag (title). Ignored.
+Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 166 Unexpected end tag (span). Ignored.
+Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
+Line: 1 Col: 174 Unexpected end tag (style). Ignored.
+Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
+Line: 1 Col: 183 Unexpected end tag (script). Ignored.
+Line: 1 Col: 196 Unexpected end tag (th). Ignored.
+Line: 1 Col: 201 Unexpected end tag (td). Ignored.
+Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 214 This element (frame) has no end tag.
+Line: 1 Col: 221 This element (area) has no end tag.
+Line: 1 Col: 228 Unexpected end tag (link). Ignored.
+Line: 1 Col: 236 This element (param) has no end tag.
+Line: 1 Col: 241 This element (hr) has no end tag.
+Line: 1 Col: 249 This element (input) has no end tag.
+Line: 1 Col: 255 Unexpected end tag (col). Ignored.
+Line: 1 Col: 262 Unexpected end tag (base). Ignored.
+Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
+Line: 1 Col: 280 This element (basefont) has no end tag.
+Line: 1 Col: 290 This element (bgsound) has no end tag.
+Line: 1 Col: 298 This element (embed) has no end tag.
+Line: 1 Col: 307 This element (spacer) has no end tag.
+Line: 1 Col: 311 Unexpected end tag (p). Ignored.
+Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
+Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
+Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
+Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
+Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
+Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
+Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
+Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
+Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
+Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
+Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
+Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
+Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
+Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
+Line: 1 Col: 471 This element (wbr) has no end tag.
+Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
+Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
+Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
+Line: 1 Col: 524 Unexpected end tag (html). Ignored.
+Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
+Line: 1 Col: 531 Unexpected end tag (head). Ignored.
+Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
+Line: 1 Col: 548 This element (image) has no end tag.
+Line: 1 Col: 558 This element (isindex) has no end tag.
+Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
+Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
+Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
+Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
+Line: 1 Col: 610 Unexpected end tag (option). Ignored.
+Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
+Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <table>
+|       <tbody>
+|         <tr>
+|     
+
+#data
+<frameset>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 10 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/html5lib/tests/testdata/tree-construction/tests10.dat b/html5lib/tests/testdata/tree-construction/tests10.dat
new file mode 100644
index 00000000..4f8df86f
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests10.dat
@@ -0,0 +1,799 @@
+#data
+<!DOCTYPE html><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><svg></svg><![CDATA[a]]>
+#errors
+29: Bogus comment
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <!-- [CDATA[a]] -->
+
+#data
+<!DOCTYPE html><body><svg></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!DOCTYPE html><body><select><svg></svg></select>
+#errors
+35: Stray “svg” start tag.
+42: Stray end tag “svg”
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><svg></svg></option></select>
+#errors
+43: Stray “svg” start tag.
+50: Stray end tag “svg”
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><svg></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+41: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+46: Stray end tag “g”.
+53: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
+#errors
+34: Start tag “svg” seen in “table”.
+46: Stray end tag “g”.
+58: Stray end tag “g”.
+65: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
+#errors
+41: Start tag “svg” seen in “table”.
+53: Stray end tag “g”.
+65: Stray end tag “g”.
+72: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
+#errors
+45: Start tag “svg” seen in “table”.
+57: Stray end tag “g”.
+69: Stray end tag “g”.
+76: Stray end tag “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg g>
+|                 "foo"
+|               <svg g>
+|                 "bar"
+|             
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+70: HTML start tag “p” in a foreign namespace context.
+81: “table” closed but “caption” was still open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|         
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+78: “table” closed but “caption” was still open.
+78: Unclosed elements on stack.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <svg svg>
+|           <svg g>
+|             "foo"
+|           <svg g>
+|             "bar"
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+44: Start tag “svg” seen in “table”.
+56: Stray end tag “g”.
+68: Stray end tag “g”.
+71: HTML start tag “p” in a foreign namespace context.
+71: Start tag “p” seen in “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+|     <table>
+|       <colgroup>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+50: Stray “svg” start tag.
+54: Stray “g” start tag.
+62: Stray end tag “g”
+66: Stray “g” start tag.
+74: Stray end tag “g”
+77: Stray “p” start tag.
+88: “table” end tag with “select” open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g>baz</table>quux
+#errors
+36: Start tag “select” seen in “table”.
+42: Stray “svg” start tag.
+46: Stray “g” start tag.
+54: Stray end tag “g”
+58: Stray “g” start tag.
+66: Stray end tag “g”
+69: Stray “p” start tag.
+80: “table” end tag with “select” open.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g>baz
+#errors
+41: Stray “svg” start tag.
+68: HTML start tag “p” in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g>baz
+#errors
+34: Stray “svg” start tag.
+61: HTML start tag “p” in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg g>
+|         "foo"
+|       <svg g>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><svg><g></g><g></g><span>
+#errors
+31: Stray “svg” start tag.
+35: Stray “g” start tag.
+40: Stray end tag “g”
+44: Stray “g” start tag.
+49: Stray end tag “g”
+52: Stray “p” start tag.
+58: Stray “span” start tag.
+58: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><span>
+#errors
+42: Stray “svg” start tag.
+46: Stray “g” start tag.
+51: Stray end tag “g”
+55: Stray “g” start tag.
+60: Stray end tag “g”
+63: Stray “p” start tag.
+69: Stray “span” start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <svg svg>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <svg svg>
+|       <svg g>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
+
+#data
+<svg></path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<div><svg></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|     "a"
+
+#data
+<div><svg><path></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|     "a"
+
+#data
+<div><svg><path></svg><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|       <path>
+
+#data
+<div><svg><path><foreignObject><math></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             <math math>
+|               "a"
+
+#data
+<div><svg><path><foreignObject></div>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             
+|               "a"
+
+#data
+<!DOCTYPE html><svg><desc><div><svg><ul>a
+#errors
+40: HTML start tag “ul” in a foreign namespace context.
+41: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <div>
+|           <svg svg>
+|           <ul>
+|             "a"
+
+#data
+<!DOCTYPE html><svg><desc><svg><ul>a
+#errors
+35: HTML start tag “ul” in a foreign namespace context.
+36: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg desc>
+|         <svg svg>
+|         <ul>
+|           "a"
+
+#data
+<!DOCTYPE html><svg><desc>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <svg svg>
+|         <svg desc>
+|           
+
+#data
+<!DOCTYPE html><svg><title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <svg svg>
+|         <svg title>
+|           
+
+#data
+<div><svg><path><foreignObject></foreignObject>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <svg svg>
+|         <svg path>
+|           <svg foreignObject>
+|             
+|             
+
+#data
+<math><mi><div><object><div><span></span></div></object></div></mi><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <div>
+|           <object>
+|             <div>
+|               <span>
+|       <math mi>
+
+#data
+<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <div>
+|       <math mi>
+
+#data
+<svg><script></script><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg script>
+|       <svg path>
+
+#data
+<table><svg></svg><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<math><mi><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math mglyph>
+
+#data
+<math><mi><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         <math malignmark>
+
+#data
+<math><mo><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math mglyph>
+
+#data
+<math><mo><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mo>
+|         <math malignmark>
+
+#data
+<math><mn><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math mglyph>
+
+#data
+<math><mn><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         <math malignmark>
+
+#data
+<math><ms><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math mglyph>
+
+#data
+<math><ms><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math ms>
+|         <math malignmark>
+
+#data
+<math><mtext><mglyph>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math mglyph>
+
+#data
+<math><mtext><malignmark>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         <math malignmark>
+
+#data
+<math><annotation-xml><svg></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <div>
+|               <math math>
+|                 <math mi>
+|               <span>
+|           <svg path>
+|       <math mi>
+
+#data
+<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg foreignObject>
+|             <math math>
+|               <math mi>
+|                 <svg svg>
+|               <math mo>
+|             <span>
+|           <svg path>
+|       <math mi>
diff --git a/html5lib/tests/testdata/tree-construction/tests11.dat b/html5lib/tests/testdata/tree-construction/tests11.dat
new file mode 100644
index 00000000..638cde47
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests11.dat
@@ -0,0 +1,482 @@
+#data
+<!DOCTYPE html><body><svg attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><BODY><SVG ATTRIBUTENAME='' ATTRIBUTETYPE='' BASEFREQUENCY='' BASEPROFILE='' CALCMODE='' CLIPPATHUNITS='' CONTENTSCRIPTTYPE='' CONTENTSTYLETYPE='' DIFFUSECONSTANT='' EDGEMODE='' EXTERNALRESOURCESREQUIRED='' FILTERRES='' FILTERUNITS='' GLYPHREF='' GRADIENTTRANSFORM='' GRADIENTUNITS='' KERNELMATRIX='' KERNELUNITLENGTH='' KEYPOINTS='' KEYSPLINES='' KEYTIMES='' LENGTHADJUST='' LIMITINGCONEANGLE='' MARKERHEIGHT='' MARKERUNITS='' MARKERWIDTH='' MASKCONTENTUNITS='' MASKUNITS='' NUMOCTAVES='' PATHLENGTH='' PATTERNCONTENTUNITS='' PATTERNTRANSFORM='' PATTERNUNITS='' POINTSATX='' POINTSATY='' POINTSATZ='' PRESERVEALPHA='' PRESERVEASPECTRATIO='' PRIMITIVEUNITS='' REFX='' REFY='' REPEATCOUNT='' REPEATDUR='' REQUIREDEXTENSIONS='' REQUIREDFEATURES='' SPECULARCONSTANT='' SPECULAREXPONENT='' SPREADMETHOD='' STARTOFFSET='' STDDEVIATION='' STITCHTILES='' SURFACESCALE='' SYSTEMLANGUAGE='' TABLEVALUES='' TARGETX='' TARGETY='' TEXTLENGTH='' VIEWBOX='' VIEWTARGET='' XCHANNELSELECTOR='' YCHANNELSELECTOR='' ZOOMANDPAN=''></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><svg attributename='' attributetype='' basefrequency='' baseprofile='' calcmode='' clippathunits='' contentscripttype='' contentstyletype='' diffuseconstant='' edgemode='' externalresourcesrequired='' filterres='' filterunits='' glyphref='' gradienttransform='' gradientunits='' kernelmatrix='' kernelunitlength='' keypoints='' keysplines='' keytimes='' lengthadjust='' limitingconeangle='' markerheight='' markerunits='' markerwidth='' maskcontentunits='' maskunits='' numoctaves='' pathlength='' patterncontentunits='' patterntransform='' patternunits='' pointsatx='' pointsaty='' pointsatz='' preservealpha='' preserveaspectratio='' primitiveunits='' refx='' refy='' repeatcount='' repeatdur='' requiredextensions='' requiredfeatures='' specularconstant='' specularexponent='' spreadmethod='' startoffset='' stddeviation='' stitchtiles='' surfacescale='' systemlanguage='' tablevalues='' targetx='' targety='' textlength='' viewbox='' viewtarget='' xchannelselector='' ychannelselector='' zoomandpan=''></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       attributeName=""
+|       attributeType=""
+|       baseFrequency=""
+|       baseProfile=""
+|       calcMode=""
+|       clipPathUnits=""
+|       contentScriptType=""
+|       contentStyleType=""
+|       diffuseConstant=""
+|       edgeMode=""
+|       externalResourcesRequired=""
+|       filterRes=""
+|       filterUnits=""
+|       glyphRef=""
+|       gradientTransform=""
+|       gradientUnits=""
+|       kernelMatrix=""
+|       kernelUnitLength=""
+|       keyPoints=""
+|       keySplines=""
+|       keyTimes=""
+|       lengthAdjust=""
+|       limitingConeAngle=""
+|       markerHeight=""
+|       markerUnits=""
+|       markerWidth=""
+|       maskContentUnits=""
+|       maskUnits=""
+|       numOctaves=""
+|       pathLength=""
+|       patternContentUnits=""
+|       patternTransform=""
+|       patternUnits=""
+|       pointsAtX=""
+|       pointsAtY=""
+|       pointsAtZ=""
+|       preserveAlpha=""
+|       preserveAspectRatio=""
+|       primitiveUnits=""
+|       refX=""
+|       refY=""
+|       repeatCount=""
+|       repeatDur=""
+|       requiredExtensions=""
+|       requiredFeatures=""
+|       specularConstant=""
+|       specularExponent=""
+|       spreadMethod=""
+|       startOffset=""
+|       stdDeviation=""
+|       stitchTiles=""
+|       surfaceScale=""
+|       systemLanguage=""
+|       tableValues=""
+|       targetX=""
+|       targetY=""
+|       textLength=""
+|       viewBox=""
+|       viewTarget=""
+|       xChannelSelector=""
+|       yChannelSelector=""
+|       zoomAndPan=""
+
+#data
+<!DOCTYPE html><body><math attributeName='' attributeType='' baseFrequency='' baseProfile='' calcMode='' clipPathUnits='' contentScriptType='' contentStyleType='' diffuseConstant='' edgeMode='' externalResourcesRequired='' filterRes='' filterUnits='' glyphRef='' gradientTransform='' gradientUnits='' kernelMatrix='' kernelUnitLength='' keyPoints='' keySplines='' keyTimes='' lengthAdjust='' limitingConeAngle='' markerHeight='' markerUnits='' markerWidth='' maskContentUnits='' maskUnits='' numOctaves='' pathLength='' patternContentUnits='' patternTransform='' patternUnits='' pointsAtX='' pointsAtY='' pointsAtZ='' preserveAlpha='' preserveAspectRatio='' primitiveUnits='' refX='' refY='' repeatCount='' repeatDur='' requiredExtensions='' requiredFeatures='' specularConstant='' specularExponent='' spreadMethod='' startOffset='' stdDeviation='' stitchTiles='' surfaceScale='' systemLanguage='' tableValues='' targetX='' targetY='' textLength='' viewBox='' viewTarget='' xChannelSelector='' yChannelSelector='' zoomAndPan=''></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       attributename=""
+|       attributetype=""
+|       basefrequency=""
+|       baseprofile=""
+|       calcmode=""
+|       clippathunits=""
+|       contentscripttype=""
+|       contentstyletype=""
+|       diffuseconstant=""
+|       edgemode=""
+|       externalresourcesrequired=""
+|       filterres=""
+|       filterunits=""
+|       glyphref=""
+|       gradienttransform=""
+|       gradientunits=""
+|       kernelmatrix=""
+|       kernelunitlength=""
+|       keypoints=""
+|       keysplines=""
+|       keytimes=""
+|       lengthadjust=""
+|       limitingconeangle=""
+|       markerheight=""
+|       markerunits=""
+|       markerwidth=""
+|       maskcontentunits=""
+|       maskunits=""
+|       numoctaves=""
+|       pathlength=""
+|       patterncontentunits=""
+|       patterntransform=""
+|       patternunits=""
+|       pointsatx=""
+|       pointsaty=""
+|       pointsatz=""
+|       preservealpha=""
+|       preserveaspectratio=""
+|       primitiveunits=""
+|       refx=""
+|       refy=""
+|       repeatcount=""
+|       repeatdur=""
+|       requiredextensions=""
+|       requiredfeatures=""
+|       specularconstant=""
+|       specularexponent=""
+|       spreadmethod=""
+|       startoffset=""
+|       stddeviation=""
+|       stitchtiles=""
+|       surfacescale=""
+|       systemlanguage=""
+|       tablevalues=""
+|       targetx=""
+|       targety=""
+|       textlength=""
+|       viewbox=""
+|       viewtarget=""
+|       xchannelselector=""
+|       ychannelselector=""
+|       zoomandpan=""
+
+#data
+<!DOCTYPE html><body><svg><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><svg><altglyph /><altglyphdef /><altglyphitem /><animatecolor /><animatemotion /><animatetransform /><clippath /><feblend /><fecolormatrix /><fecomponenttransfer /><fecomposite /><feconvolvematrix /><fediffuselighting /><fedisplacementmap /><fedistantlight /><feflood /><fefunca /><fefuncb /><fefuncg /><fefuncr /><fegaussianblur /><feimage /><femerge /><femergenode /><femorphology /><feoffset /><fepointlight /><fespecularlighting /><fespotlight /><fetile /><feturbulence /><foreignobject /><glyphref /><lineargradient /><radialgradient /><textpath /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><BODY><SVG><ALTGLYPH /><ALTGLYPHDEF /><ALTGLYPHITEM /><ANIMATECOLOR /><ANIMATEMOTION /><ANIMATETRANSFORM /><CLIPPATH /><FEBLEND /><FECOLORMATRIX /><FECOMPONENTTRANSFER /><FECOMPOSITE /><FECONVOLVEMATRIX /><FEDIFFUSELIGHTING /><FEDISPLACEMENTMAP /><FEDISTANTLIGHT /><FEFLOOD /><FEFUNCA /><FEFUNCB /><FEFUNCG /><FEFUNCR /><FEGAUSSIANBLUR /><FEIMAGE /><FEMERGE /><FEMERGENODE /><FEMORPHOLOGY /><FEOFFSET /><FEPOINTLIGHT /><FESPECULARLIGHTING /><FESPOTLIGHT /><FETILE /><FETURBULENCE /><FOREIGNOBJECT /><GLYPHREF /><LINEARGRADIENT /><RADIALGRADIENT /><TEXTPATH /></SVG>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg altGlyph>
+|       <svg altGlyphDef>
+|       <svg altGlyphItem>
+|       <svg animateColor>
+|       <svg animateMotion>
+|       <svg animateTransform>
+|       <svg clipPath>
+|       <svg feBlend>
+|       <svg feColorMatrix>
+|       <svg feComponentTransfer>
+|       <svg feComposite>
+|       <svg feConvolveMatrix>
+|       <svg feDiffuseLighting>
+|       <svg feDisplacementMap>
+|       <svg feDistantLight>
+|       <svg feFlood>
+|       <svg feFuncA>
+|       <svg feFuncB>
+|       <svg feFuncG>
+|       <svg feFuncR>
+|       <svg feGaussianBlur>
+|       <svg feImage>
+|       <svg feMerge>
+|       <svg feMergeNode>
+|       <svg feMorphology>
+|       <svg feOffset>
+|       <svg fePointLight>
+|       <svg feSpecularLighting>
+|       <svg feSpotLight>
+|       <svg feTile>
+|       <svg feTurbulence>
+|       <svg foreignObject>
+|       <svg glyphRef>
+|       <svg linearGradient>
+|       <svg radialGradient>
+|       <svg textPath>
+
+#data
+<!DOCTYPE html><body><math><altGlyph /><altGlyphDef /><altGlyphItem /><animateColor /><animateMotion /><animateTransform /><clipPath /><feBlend /><feColorMatrix /><feComponentTransfer /><feComposite /><feConvolveMatrix /><feDiffuseLighting /><feDisplacementMap /><feDistantLight /><feFlood /><feFuncA /><feFuncB /><feFuncG /><feFuncR /><feGaussianBlur /><feImage /><feMerge /><feMergeNode /><feMorphology /><feOffset /><fePointLight /><feSpecularLighting /><feSpotLight /><feTile /><feTurbulence /><foreignObject /><glyphRef /><linearGradient /><radialGradient /><textPath /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math altglyph>
+|       <math altglyphdef>
+|       <math altglyphitem>
+|       <math animatecolor>
+|       <math animatemotion>
+|       <math animatetransform>
+|       <math clippath>
+|       <math feblend>
+|       <math fecolormatrix>
+|       <math fecomponenttransfer>
+|       <math fecomposite>
+|       <math feconvolvematrix>
+|       <math fediffuselighting>
+|       <math fedisplacementmap>
+|       <math fedistantlight>
+|       <math feflood>
+|       <math fefunca>
+|       <math fefuncb>
+|       <math fefuncg>
+|       <math fefuncr>
+|       <math fegaussianblur>
+|       <math feimage>
+|       <math femerge>
+|       <math femergenode>
+|       <math femorphology>
+|       <math feoffset>
+|       <math fepointlight>
+|       <math fespecularlighting>
+|       <math fespotlight>
+|       <math fetile>
+|       <math feturbulence>
+|       <math foreignobject>
+|       <math glyphref>
+|       <math lineargradient>
+|       <math radialgradient>
+|       <math textpath>
+
+#data
+<!DOCTYPE html><body><svg><solidColor /></svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg solidcolor>
diff --git a/html5lib/tests/testdata/tree-construction/tests12.dat b/html5lib/tests/testdata/tree-construction/tests12.dat
new file mode 100644
index 00000000..63107d27
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests12.dat
@@ -0,0 +1,62 @@
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       "foo"
+|       <math math>
+|         <math mtext>
+|           <i>
+|             "baz"
+|         <math annotation-xml>
+|           <svg svg>
+|             <svg desc>
+|               <b>
+|                 "eggs"
+|             <svg g>
+|               <svg foreignObject>
+|                 
+|                   "spam"
+|                 <table>
+|                   <tbody>
+|                     <tr>
+|                       <td>
+|                         <img>
+|             <svg g>
+|               "quux"
+|       "bar"
+
+#data
+<!DOCTYPE html><body>foo<math><mtext><i>baz</i></mtext><annotation-xml><svg><desc><b>eggs</b></desc><g><foreignObject>spam<TABLE><tr><td><img></td></table></foreignObject></g><g>quux</g></svg></annotation-xml></math>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <math math>
+|       <math mtext>
+|         <i>
+|           "baz"
+|       <math annotation-xml>
+|         <svg svg>
+|           <svg desc>
+|             <b>
+|               "eggs"
+|           <svg g>
+|             <svg foreignObject>
+|               
+|                 "spam"
+|               <table>
+|                 <tbody>
+|                   <tr>
+|                     <td>
+|                       <img>
+|           <svg g>
+|             "quux"
+|     "bar"
diff --git a/html5lib/tests/testdata/tree-construction/tests14.dat b/html5lib/tests/testdata/tree-construction/tests14.dat
new file mode 100644
index 00000000..b8713f88
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests14.dat
@@ -0,0 +1,74 @@
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html><body><xyz:abc></xyz:abc><span></span>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xyz:abc>
+|     <span>
+
+#data
+<!DOCTYPE html><html><html abc:def=gh><xyz:abc></xyz:abc>
+#errors
+15: Unexpected start tag html
+#document
+| <!DOCTYPE html>
+| <html>
+|   abc:def="gh"
+|   <head>
+|   <body>
+|     <xyz:abc>
+
+#data
+<!DOCTYPE html><html xml:lang=bar><html xml:lang=foo>
+#errors
+15: Unexpected start tag html
+#document
+| <!DOCTYPE html>
+| <html>
+|   xml:lang="bar"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html 123=456><html 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   123="456"
+|   789="012"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><html><body 789=012>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     789="012"
diff --git a/html5lib/tests/testdata/tree-construction/tests15.dat b/html5lib/tests/testdata/tree-construction/tests15.dat
new file mode 100644
index 00000000..6ce1c0d1
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests15.dat
@@ -0,0 +1,208 @@
+#data
+<!DOCTYPE html><b><i><u> X
+#errors
+Line: 1 Col: 31 Unexpected end tag (p). Ignored.
+Line: 1 Col: 36 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           " "
+|           
+|             "X"
+
+#data
+<b><i><u>
+X
+#errors
+Line: 1 Col: 3 Unexpected start tag (p). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end tag (p). Ignored.
+Line: 2 Col: 4 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         <i>
+|           <u>
+|     <b>
+|       <i>
+|         <u>
+|           "
+"
+|           
+|             "X"
+
+#data
+<!doctype html></html> <head>
+#errors
+Line: 1 Col: 22 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " "
+
+#data
+<!doctype html></body><meta>
+#errors
+Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+
+#data
+<html></html><!-- foo -->
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (html) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  foo  -->
+
+#data
+<!doctype html></body><title>X</title>
+#errors
+Line: 1 Col: 22 Unexpected end tag (body) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table> X<meta></table>
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 30 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " X"
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table> x</table>
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+
+#data
+<!doctype html><table> x </table>
+#errors
+Line: 1 Col: 25 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x "
+|     <table>
+
+#data
+<!doctype html><table><tr> x</table>
+#errors
+Line: 1 Col: 28 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " x"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table>X<style> <tr>x </style> </table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><div><table><a>foo</a> <tr><td>bar</td> </tr></table></div>
+#errors
+Line: 1 Col: 30 Unexpected start tag (a) in table context caused voodoo mode.
+Line: 1 Col: 37 Unexpected end tag (a) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <a>
+|         "foo"
+|       <table>
+|         " "
+|         <tbody>
+|           <tr>
+|             <td>
+|               "bar"
+|             " "
+
+#data
+<frame></frame></frame><frameset><frame><frameset><frame></frameset><noframes></frameset><noframes>
+#errors
+6: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+13: Stray start tag “frame”.
+21: Stray end tag “frame”.
+29: Stray end tag “frame”.
+39: “frameset” start tag after “body” already open.
+105: End of file seen inside an [R]CDATA element.
+105: End of file seen and there were open elements.
+XXX: These errors are wrong, please fix me!
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+|     <frameset>
+|       <frame>
+|     <noframes>
+|       "</frameset><noframes>"
+
+#data
+<!DOCTYPE html><object></html>
+#errors
+1: Expected closing tag. Unexpected end of file
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
diff --git a/html5lib/tests/testdata/tree-construction/tests16.dat b/html5lib/tests/testdata/tree-construction/tests16.dat
new file mode 100644
index 00000000..c8ef66f0
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests16.dat
@@ -0,0 +1,2299 @@
+#data
+<!doctype html><script>
+#errors
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script>a
+#errors
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<!doctype html><script><
+#errors
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<!doctype html><script></
+#errors
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<!doctype html><script></S
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<!doctype html><script></SC
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<!doctype html><script></SCR
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<!doctype html><script></SCRI
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<!doctype html><script></SCRIP
+#errors
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT
+#errors
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<!doctype html><script></SCRIPT 
+#errors
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script></s
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<!doctype html><script></sc
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<!doctype html><script></scr
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<!doctype html><script></scri
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<!doctype html><script></scrip
+#errors
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<!doctype html><script></script
+#errors
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<!doctype html><script></script 
+#errors
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<!doctype html><script><!
+#errors
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<!doctype html><script><!a
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<!doctype html><script><!-
+#errors
+Line: 1 Col: 26 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<!doctype html><script><!-a
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<!doctype html><script><!--
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--a
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<!doctype html><script><!--<
+#errors
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<!doctype html><script><!--<a
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<!doctype html><script><!--</
+#errors
+Line: 1 Col: 27 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<!doctype html><script><!--</script
+#errors
+Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<!doctype html><script><!--</script 
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<s
+#errors
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script
+#errors
+Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script 
+#errors
+Line: 1 Col: 35 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script <
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script <a
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </s
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script
+#errors
+Line: 1 Col: 43 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </scripta
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script 
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script>
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script/
+#errors
+Line: 1 Col: 44 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <
+#errors
+Line: 1 Col: 45 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script <a
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script
+#errors
+Line: 1 Col: 52 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script 
+#errors
+Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script/
+#errors
+Line: 1 Col: 53 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script </script </script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<!doctype html><script><!--<script -
+#errors
+Line: 1 Col: 36 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -a
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -<
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --
+#errors
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --a
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --<
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --<"
+|   <body>
+
+#data
+<!doctype html><script><!--<script -->
+#errors
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --><
+#errors
+Line: 1 Col: 39 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></
+#errors
+Line: 1 Col: 40 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script
+#errors
+Line: 1 Col: 46 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script 
+#errors
+Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script/
+#errors
+Line: 1 Col: 47 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script --></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script><\/script>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt>--></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>--><!--</script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-- ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- -></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>- - ></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<!doctype html><script><!--<script></script><script></script>-></script>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<!doctype html><script><!--<script>--!></script>X
+#errors
+Line: 1 Col: 49 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<!doctype html><script><!--<scr'+'ipt></script>--></script>
+#errors
+Line: 1 Col: 59 Unexpected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><script><!--<script></scr'+'ipt></script>X
+#errors
+Line: 1 Col: 57 Unexpected end of file. Expected end tag (script).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<!doctype html><style><!--<style></style>--></style>
+#errors
+Line: 1 Col: 52 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...</style>...--></style>
+#errors
+Line: 1 Col: 51 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<!doctype html><style><!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><style><!--...<style><!--...--!></style>--></style>
+#errors
+Line: 1 Col: 66 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><style><!--...</style><!-- --><style>@import ...</style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<!doctype html><style>...<style><!--...</style><!-- --></style>
+#errors
+Line: 1 Col: 63 Unexpected end tag (style).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<!doctype html><style>...<!--[if IE]><style>...</style>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<!doctype html><title><!--<title></title>--></title>
+#errors
+Line: 1 Col: 52 Unexpected end tag (title).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><title>&lt;/title></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<!doctype html><title>foo/title><link></head><body>X
+#errors
+Line: 1 Col: 52 Unexpected end of file. Expected end tag (title).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<!doctype html><noscript><!--<noscript></noscript>--></noscript>
+#errors
+Line: 1 Col: 64 Unexpected end tag (noscript).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<!doctype html><noscript><iframe></noscript>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<!doctype html><noframes><!--<noframes></noframes>--></noframes>
+#errors
+Line: 1 Col: 64 Unexpected end tag (noframes).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<!doctype html><noframes><body><script><!--...</script></body></noframes></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<!doctype html><textarea><!--<textarea></textarea>--></textarea>
+#errors
+Line: 1 Col: 64 Unexpected end tag (textarea).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<!doctype html><textarea>&lt;/textarea></textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<!doctype html><textarea>&lt;</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<"
+
+#data
+<!doctype html><textarea>a&lt;b</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "a<b"
+
+#data
+<!doctype html><iframe><!--<iframe></iframe>--></iframe>
+#errors
+Line: 1 Col: 56 Unexpected end tag (iframe).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<!doctype html><iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<!doctype html><xmp><!--<xmp></xmp>--></xmp>
+#errors
+Line: 1 Col: 44 Unexpected end tag (xmp).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<!doctype html><noembed><!--<noembed></noembed>--></noembed>
+#errors
+Line: 1 Col: 60 Unexpected end tag (noembed).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 8 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script>a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "a"
+|   <body>
+
+#data
+<script><
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<"
+|   <body>
+
+#data
+<script></
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</"
+|   <body>
+
+#data
+<script></S
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</S"
+|   <body>
+
+#data
+<script></SC
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SC"
+|   <body>
+
+#data
+<script></SCR
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCR"
+|   <body>
+
+#data
+<script></SCRI
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRI"
+|   <body>
+
+#data
+<script></SCRIP
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIP"
+|   <body>
+
+#data
+<script></SCRIPT
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</SCRIPT"
+|   <body>
+
+#data
+<script></SCRIPT 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script></s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</s"
+|   <body>
+
+#data
+<script></sc
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</sc"
+|   <body>
+
+#data
+<script></scr
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scr"
+|   <body>
+
+#data
+<script></scri
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scri"
+|   <body>
+
+#data
+<script></scrip
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</scrip"
+|   <body>
+
+#data
+<script></script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</script"
+|   <body>
+
+#data
+<script></script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<script><!
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 10 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!"
+|   <body>
+
+#data
+<script><!a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!a"
+|   <body>
+
+#data
+<script><!-
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-"
+|   <body>
+
+#data
+<script><!-a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!-a"
+|   <body>
+
+#data
+<script><!--
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--a"
+|   <body>
+
+#data
+<script><!--<
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<"
+|   <body>
+
+#data
+<script><!--<a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<a"
+|   <body>
+
+#data
+<script><!--</
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</"
+|   <body>
+
+#data
+<script><!--</script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--</script"
+|   <body>
+
+#data
+<script><!--</script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--"
+|   <body>
+
+#data
+<script><!--<s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<s"
+|   <body>
+
+#data
+<script><!--<script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script"
+|   <body>
+
+#data
+<script><!--<script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script "
+|   <body>
+
+#data
+<script><!--<script <
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <"
+|   <body>
+
+#data
+<script><!--<script <a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script <a"
+|   <body>
+
+#data
+<script><!--<script </
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </"
+|   <body>
+
+#data
+<script><!--<script </s
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </s"
+|   <body>
+
+#data
+<script><!--<script </script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script"
+|   <body>
+
+#data
+<script><!--<script </scripta
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </scripta"
+|   <body>
+
+#data
+<script><!--<script </script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script>"
+|   <body>
+
+#data
+<script><!--<script </script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script/"
+|   <body>
+
+#data
+<script><!--<script </script <
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <"
+|   <body>
+
+#data
+<script><!--<script </script <a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script <a"
+|   <body>
+
+#data
+<script><!--<script </script </
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </"
+|   <body>
+
+#data
+<script><!--<script </script </script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script </script"
+|   <body>
+
+#data
+<script><!--<script </script </script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 38 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script </script </script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script </script "
+|   <body>
+
+#data
+<script><!--<script -
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -"
+|   <body>
+
+#data
+<script><!--<script -a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -a"
+|   <body>
+
+#data
+<script><!--<script --
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --"
+|   <body>
+
+#data
+<script><!--<script --a
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --a"
+|   <body>
+
+#data
+<script><!--<script -->
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --><
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --><"
+|   <body>
+
+#data
+<script><!--<script --></
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></"
+|   <body>
+
+#data
+<script><!--<script --></script
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script --></script"
+|   <body>
+
+#data
+<script><!--<script --></script 
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script/
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 32 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script --></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script -->"
+|   <body>
+
+#data
+<script><!--<script><\/script>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script><\/script>-->"
+|   <body>
+
+#data
+<script><!--<script></scr'+'ipt>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt>-->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>--><!--</script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>--><!--"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-- ></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>-- >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- -></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- ->"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>- - ></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>- - >"
+|   <body>
+
+#data
+<script><!--<script></script><script></script>-></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></script><script></script>->"
+|   <body>
+
+#data
+<script><!--<script>--!></script>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 34 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script>--!></script>X"
+|   <body>
+
+#data
+<script><!--<scr'+'ipt></script>--></script>
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 44 Unexpected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<scr'+'ipt>"
+|   <body>
+|     "-->"
+
+#data
+<script><!--<script></scr'+'ipt></script>X
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 42 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "<!--<script></scr'+'ipt></script>X"
+|   <body>
+
+#data
+<style><!--<style></style>--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<style>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--</style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--"
+|   <body>
+|     "X"
+
+#data
+<style><!--...</style>...--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 36 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|   <body>
+|     "...-->"
+
+#data
+<style><!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style></style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--<html xmlns:v="urn:schemas-microsoft-com:vml"><!--[if !mso]><style>"
+|   <body>
+|     "X"
+
+#data
+<style><!--...<style><!--...--!></style>--></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 51 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--...<style><!--...--!>"
+|   <body>
+|     "-->"
+
+#data
+<style><!--...</style><!-- --><style>@import ...</style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "<!--..."
+|     <!--   -->
+|     <style>
+|       "@import ..."
+|   <body>
+
+#data
+<style>...<style><!--...</style><!-- --></style>
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 48 Unexpected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<style><!--..."
+|     <!--   -->
+|   <body>
+
+#data
+<style>...<!--[if IE]><style>...</style>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       "...<!--[if IE]><style>..."
+|   <body>
+|     "X"
+
+#data
+<title><!--<title></title>--></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--<title>"
+|   <body>
+|     "-->"
+
+#data
+<title>&lt;/title></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "</title>"
+|   <body>
+
+#data
+<title>foo/title><link></head><body>X
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 37 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo/title><link></head><body>X"
+|   <body>
+
+#data
+<noscript><!--<noscript></noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (noscript).
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--<noscript>"
+|   <body>
+|     "-->"
+
+#data
+<noscript><!--</noscript>X<noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "X"
+|     <noscript>
+|       "-->"
+
+#data
+<noscript><iframe></noscript>X
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<iframe>"
+|   <body>
+|     "X"
+
+#data
+<noframes><!--<noframes></noframes>--></noframes>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (noframes).
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<!--<noframes>"
+|   <body>
+|     "-->"
+
+#data
+<noframes><body><script><!--...</script></body></noframes></html>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noframes). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noframes>
+|       "<body><script><!--...</script></body>"
+|   <body>
+
+#data
+<textarea><!--<textarea></textarea>--></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 49 Unexpected end tag (textarea).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "<!--<textarea>"
+|     "-->"
+
+#data
+<textarea>&lt;/textarea></textarea>
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "</textarea>"
+
+#data
+<iframe><!--<iframe></iframe>--></iframe>
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+Line: 1 Col: 41 Unexpected end tag (iframe).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "<!--<iframe>"
+|     "-->"
+
+#data
+<iframe>...<!--X->...<!--/X->...</iframe>
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       "...<!--X->...<!--/X->..."
+
+#data
+<xmp><!--<xmp></xmp>--></xmp>
+#errors
+Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+Line: 1 Col: 29 Unexpected end tag (xmp).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       "<!--<xmp>"
+|     "-->"
+
+#data
+<noembed><!--<noembed></noembed>--></noembed>
+#errors
+Line: 1 Col: 9 Unexpected start tag (noembed). Expected DOCTYPE.
+Line: 1 Col: 45 Unexpected end tag (noembed).
+#document
+| <html>
+|   <head>
+|   <body>
+|     <noembed>
+|       "<!--<noembed>"
+|     "-->"
+
+#data
+<!doctype html><table>
+
+#errors
+Line 2 Col 0 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "
+"
+
+#data
+<!doctype html><table><td><span><font></span><span>
+#errors
+Line 1 Col 26 Unexpected table cell start tag (td) in the table body phase.
+Line 1 Col 45 Unexpected end tag (span).
+Line 1 Col 51 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <span>
+|               <font>
+|             <font>
+|               <span>
+
+#data
+<!doctype html><form><table></form><form></table></form>
+#errors
+35: Stray end tag “form”.
+41: Start tag “form” seen in “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+|         <form>
diff --git a/html5lib/tests/testdata/tree-construction/tests17.dat b/html5lib/tests/testdata/tree-construction/tests17.dat
new file mode 100644
index 00000000..7b555f88
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests17.dat
@@ -0,0 +1,153 @@
+#data
+<!doctype html><table><tbody><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tr><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<!doctype html><table><tr><td><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><tr><th><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <th>
+|             <select>
+|           <td>
+
+#data
+<!doctype html><table><caption><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <select>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select><tr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><th>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tbody>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><thead>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><tfoot>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><select><caption>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><table><tr></table>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|     "a"
diff --git a/html5lib/tests/testdata/tree-construction/tests18.dat b/html5lib/tests/testdata/tree-construction/tests18.dat
new file mode 100644
index 00000000..680e1f06
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests18.dat
@@ -0,0 +1,269 @@
+#data
+<!doctype html><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><table><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+
+#data
+<!doctype html><table><tbody><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><tbody><tr><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <plaintext>
+|               "</plaintext>"
+
+#data
+<!doctype html><table><caption><plaintext></plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <plaintext>
+|           "</plaintext>"
+
+#data
+<!doctype html><table><tr><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <style>
+|             "</script>"
+
+#data
+<!doctype html><table><tr><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <script>
+|             "</style>"
+
+#data
+<!doctype html><table><caption><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <style>
+|           "</script>"
+|         "abc"
+
+#data
+<!doctype html><table><td><style></script></style>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <style>
+|               "</script>"
+|             "abc"
+
+#data
+<!doctype html><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+
+#data
+<!doctype html><table><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+
+#data
+<!doctype html><table><tr><select><script></style></script>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <script>
+|         "</style>"
+|       "abc"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><frameset></frameset><noframes>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+|   <!-- abc -->
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+
+#data
+<!doctype html><frameset></frameset></html><noframes>abc</noframes><!--abc-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   <noframes>
+|     "abc"
+| <!-- abc -->
+
+#data
+<!doctype html><table><tr></tbody><tfoot>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|       <tfoot>
+
+#data
+<!doctype html><table><td><svg></svg>abc<td>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|             "abc"
+|           <td>
diff --git a/html5lib/tests/testdata/tree-construction/tests19.dat b/html5lib/tests/testdata/tree-construction/tests19.dat
new file mode 100644
index 00000000..0d62f5a5
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests19.dat
@@ -0,0 +1,1237 @@
+#data
+<!doctype html><math><mn DefinitionUrl="foo">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mn>
+|         definitionURL="foo"
+
+#data
+<!doctype html><html><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <!-- foo -->
+|   <head>
+|   <body>
+
+#data
+<!doctype html><head></head><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <!-- foo -->
+|   <body>
+
+#data
+<!doctype html><body><pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <pre>
+
+#data
+<!doctype html><body><listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <listing>
+
+#data
+<!doctype html><plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <plaintext>
+
+#data
+<!doctype html><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <h1>
+
+#data
+<!doctype html><form><isindex>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<!doctype html><isindex action="POST">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       action="POST"
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><isindex prompt="this is isindex">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "this is isindex"
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><isindex type="hidden">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|           type="hidden"
+|       <hr>
+
+#data
+<!doctype html><isindex name="foo">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|       <hr>
+
+#data
+<!doctype html><ruby><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       
+|       <rp>
+
+#data
+<!doctype html><ruby><div><span><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rp>
+
+#data
+<!doctype html><ruby><div><rp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         
+|         <rp>
+
+#data
+<!doctype html><ruby><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       
+|       <rt>
+
+#data
+<!doctype html><ruby><div><span><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <span>
+|           <rt>
+
+#data
+<!doctype html><ruby><div><rt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         
+|         <rt>
+
+#data
+<!doctype html><math/><foo>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <foo>
+
+#data
+<!doctype html><svg/><foo>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <foo>
+
+#data
+<!doctype html><div></body><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|   <!-- foo -->
+
+#data
+<!doctype html><h1><div><h3><span></h1>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h1>
+|       <div>
+|         <h3>
+|           <span>
+|         "foo"
+
+#data
+<!doctype html></h3>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "foo"
+
+#data
+<!doctype html><h3><li>abc</h2>foo
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <h3>
+|       <li>
+|         "abc"
+|     "foo"
+
+#data
+<!doctype html><table>abc<!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "abc"
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><table>  <!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <!-- foo -->
+
+#data
+<!doctype html><table> b <!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     " b "
+|     <table>
+|       <!-- foo -->
+
+#data
+<!doctype html><select><option><option>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><select><option></optgroup>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!doctype html><math><mi><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mi>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mo><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mo>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mn><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mn>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><ms><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math ms>
+|           
+|           <h1>
+
+#data
+<!doctype html><math><mtext><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mtext>
+|           
+|           <h1>
+
+#data
+<!doctype html><frameset></noframes>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html c=d><body></html><html a=b>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><html c=d><frameset></frameset></html><html a=b>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html><!--foo-->
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+| <!-- foo -->
+
+#data
+<!doctype html><html><frameset></frameset></html>  
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|   "  "
+
+#data
+<!doctype html><html><frameset></frameset></html>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><html><frameset></frameset></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<html><frameset></frameset></html><!doctype html>
+#errors
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html>a<frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       "a"
+
+#data
+<!doctype html> <frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><pre><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!doctype html><listing><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+
+#data
+<!doctype html><li><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <li>
+
+#data
+<!doctype html><dd><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+
+#data
+<!doctype html><dt><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+
+#data
+<!doctype html><button><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <button>
+
+#data
+<!doctype html><applet><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <applet>
+
+#data
+<!doctype html><marquee><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <marquee>
+
+#data
+<!doctype html><object><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <object>
+
+#data
+<!doctype html><table><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+<!doctype html><area><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+
+#data
+<!doctype html><basefont><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <basefont>
+|   <frameset>
+
+#data
+<!doctype html><bgsound><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <bgsound>
+|   <frameset>
+
+#data
+<!doctype html><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<!doctype html><embed><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+
+#data
+<!doctype html><img><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html><input><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+
+#data
+<!doctype html><keygen><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+
+#data
+<!doctype html><wbr><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+
+#data
+<!doctype html><hr><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+
+#data
+<!doctype html><textarea></textarea><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!doctype html><xmp></xmp><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+
+#data
+<!doctype html><iframe></iframe><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+
+#data
+<!doctype html><select></select><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!doctype html><svg></svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><math></math><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg><foreignObject><div> <frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<!doctype html><svg>a</svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "a"
+
+#data
+<!doctype html><svg> </svg><frameset><frame>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+|     <frame>
+
+#data
+<html>aaa<frameset></frameset>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "aaa"
+
+#data
+<html> a <frameset></frameset>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "a "
+
+#data
+<!doctype html><div><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><div><body><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<!doctype html><p><math>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|     "a"
+
+#data
+<!doctype html><math><mn><span>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <math math>
+|         <math mn>
+|           <span>
+|             
+|             "a"
+
+#data
+<!doctype html><math></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!doctype html><meta charset="ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       charset="ascii"
+|   <body>
+
+#data
+<!doctype html><meta http-equiv="content-type" content="text/html;charset=ascii">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|       content="text/html;charset=ascii"
+|       http-equiv="content-type"
+|   <body>
+
+#data
+<!doctype html><head><!--aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa--><meta charset="utf8">
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <!-- aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -->
+|     <meta>
+|       charset="utf8"
+|   <body>
+
+#data
+<!doctype html><html a=b><head></head><html c=d>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   a="b"
+|   c="d"
+|   <head>
+|   <body>
+
+#data
+<!doctype html><image/>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+
+#data
+<!doctype html>a<i>b<table>c<b>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "a"
+|     <i>
+|       "bc"
+|       <b>
+|         "de"
+|       "f"
+|       <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+
+#data
+<!doctype html><table><i>a<b>b<div>c</i>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|       <div>
+|         <i>
+|           "c"
+|     <table>
+
+#data
+<!doctype html><table><i>a<b>b<div>c<a>d</i>e</b>f
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <b>
+|         "b"
+|     <b>
+|     <div>
+|       <b>
+|         <i>
+|           "c"
+|           <a>
+|             "d"
+|         <a>
+|           "e"
+|       <a>
+|         "f"
+|     <table>
+
+#data
+<!doctype html><table><i>a<div>b<tr>c<b>d</i>e
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <i>
+|       "a"
+|       <div>
+|         "b"
+|     <i>
+|       "c"
+|       <b>
+|         "d"
+|     <b>
+|       "e"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><table><td><table><i>a<div>b<b>c</i>d
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <i>
+|               "a"
+|             <div>
+|               <i>
+|                 "b"
+|                 <b>
+|                   "c"
+|               <b>
+|                 "d"
+|             <table>
+
+#data
+<!doctype html><body><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+
+#data
+<!doctype html><body><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><basefont>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <basefont>
+
+#data
+<!doctype html><a><b></a><bgsound>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <bgsound>
+
+#data
+<!doctype html><figcaption><article></figcaption>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <figcaption>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><summary><article></summary>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <summary>
+|       <article>
+|     "a"
+
+#data
+<!doctype html><a><plaintext>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <a>
+|     <plaintext>
+|       <a>
+|         "b"
+
+#data
+<!DOCTYPE html><div>a<a></div>bcd
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "a"
+|       <a>
+|     <a>
+|       "b"
+|       
+|         "c"
+|       "d"
diff --git a/html5lib/tests/testdata/tree-construction/tests2.dat b/html5lib/tests/testdata/tree-construction/tests2.dat
new file mode 100644
index 00000000..60d85922
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests2.dat
@@ -0,0 +1,763 @@
+#data
+<!DOCTYPE html>Test
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<textarea>test</div>test
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 24 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "test</div>test"
+
+#data
+<table><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 11 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td>test</tbody></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected table cell start tag (td) in the table body phase.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<frame>test
+#errors
+Line: 1 Col: 7 Unexpected start tag (frame). Expected DOCTYPE.
+Line: 1 Col: 7 Unexpected start tag frame. Ignored.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test"
+
+#data
+<!DOCTYPE html><frameset>test
+#errors
+Line: 1 Col: 29 Unepxected characters in the frameset phase. Characters ignored.
+Line: 1 Col: 29 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset><!DOCTYPE html>
+#errors
+Line: 1 Col: 40 Unexpected DOCTYPE. Ignored.
+Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><font><b>test</font>
+#errors
+Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 38 End tag (font) violates step 1, paragraph 3 of the adoption agency algorithm.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|     
+|       <font>
+|         <b>
+|           "test"
+
+#data
+<!DOCTYPE html><dt><div><dd>
+#errors
+Line: 1 Col: 28 Missing end tag (div, dt).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <dt>
+|       <div>
+|     <dd>
+
+#data
+<script></x
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+Line: 1 Col: 11 Unexpected end of file. Expected end tag (script).
+#document
+| <html>
+|   <head>
+|     <script>
+|       "</x"
+|   <body>
+
+#data
+<table><plaintext><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 18 Unexpected start tag (plaintext) in table context caused voodoo mode.
+Line: 1 Col: 22 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "<td>"
+|     <table>
+
+#data
+<plaintext></plaintext>
+#errors
+Line: 1 Col: 11 Unexpected start tag (plaintext). Expected DOCTYPE.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!DOCTYPE html><table><tr>TEST
+#errors
+Line: 1 Col: 30 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 30 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "TEST"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body t1=1><body t2=2><body t3=3 t4=4>
+#errors
+Line: 1 Col: 37 Unexpected start tag (body).
+Line: 1 Col: 53 Unexpected start tag (body).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     t1="1"
+|     t2="2"
+|     t3="3"
+|     t4="4"
+
+#data
+</b test
+#errors
+Line: 1 Col: 8 Unexpected end of file in attribute name.
+Line: 1 Col: 8 End tag contains unexpected attributes.
+Line: 1 Col: 8 Unexpected end tag (b). Expected DOCTYPE.
+Line: 1 Col: 8 Unexpected end tag (b) after the (implied) root element.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html></b test<b &=&amp>X
+#errors
+Line: 1 Col: 32 Named entity didn't end with ';'.
+Line: 1 Col: 33 End tag contains unexpected attributes.
+Line: 1 Col: 33 Unexpected end tag (b) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!doctypehtml><scrIPt type=text/x-foobar;baz>X</SCRipt
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 54 Unexpected end of file in the tag name.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       type="text/x-foobar;baz"
+|       "X</SCRipt"
+|   <body>
+
+#data
+&
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&#
+#errors
+Line: 1 Col: 1 Numeric entity expected. Got end of file instead.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#"
+
+#data
+&#X
+#errors
+Line: 1 Col: 3 Numeric entity expected but none found.
+Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#X"
+
+#data
+&#x
+#errors
+Line: 1 Col: 3 Numeric entity expected but none found.
+Line: 1 Col: 3 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&#x"
+
+#data
+&#45
+#errors
+Line: 1 Col: 4 Numeric entity didn't end with ';'.
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "-"
+
+#data
+&x-test
+#errors
+Line: 1 Col: 1 Named entity expected. Got none.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&x-test"
+
+#data
+<!doctypehtml><li>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <li>
+
+#data
+<!doctypehtml><dt>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <dt>
+
+#data
+<!doctypehtml><dd>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <dd>
+
+#data
+<!doctypehtml><form>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <form>
+
+#data
+<!DOCTYPE html>X
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     "X"
+
+#data
+&AMP
+#errors
+Line: 1 Col: 4 Named entity didn't end with ';'.
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&"
+
+#data
+&AMp;
+#errors
+Line: 1 Col: 1 Named entity expected. Got none.
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "&AMp;"
+
+#data
+<!DOCTYPE html><html><head></head><body><thisISasillyTESTelementNameToMakeSureCrazyTagNamesArePARSEDcorrectLY>
+#errors
+Line: 1 Col: 110 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <thisisasillytestelementnametomakesurecrazytagnamesareparsedcorrectly>
+
+#data
+<!DOCTYPE html>X</body>X
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html><!-- X
+#errors
+Line: 1 Col: 21 Unexpected end of file in comment.
+#document
+| <!DOCTYPE html>
+| <!--  X -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><caption>test TEST</caption><td>test
+#errors
+Line: 1 Col: 54 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         "test TEST"
+|       <tbody>
+|         <tr>
+|           <td>
+|             "test"
+
+#data
+<!DOCTYPE html><select><option><optgroup>
+#errors
+Line: 1 Col: 41 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><select><optgroup><option></optgroup><option><select><option>
+#errors
+Line: 1 Col: 68 Unexpected select start tag in the select phase treated as select end tag.
+Line: 1 Col: 76 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <option>
+|     <option>
+
+#data
+<!DOCTYPE html><select><optgroup><option><optgroup>
+#errors
+Line: 1 Col: 51 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+|         <option>
+|       <optgroup>
+
+#data
+<!DOCTYPE html><datalist><option>foo</datalist>bar
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <datalist>
+|       <option>
+|         "foo"
+|     "bar"
+
+#data
+<!DOCTYPE html><font><input><input></font>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       <input>
+|       <input>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX
+#errors
+Line: 1 Col: 29 Unexpected end of file in comment (-)
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><!-- XXX - XXX - XXX -->
+#errors
+#document
+| <!DOCTYPE html>
+| <!--  XXX - XXX - XXX  -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<isindex test=x name=x>
+#errors
+Line: 1 Col: 23 Unexpected start tag (isindex). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected start tag isindex. Don't use it!
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <hr>
+|       <label>
+|         "This is a searchable index. Enter search keywords: "
+|         <input>
+|           name="isindex"
+|           test="x"
+|       <hr>
+
+#data
+test
+test
+#errors
+Line: 2 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "test
+test"
+
+#data
+<!DOCTYPE html><body><title>test</body></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "test</body>"
+
+#data
+<!DOCTYPE html><body><title>X</title><meta name=z><link rel=foo><style>
+x { content:"</style" } </style>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <meta>
+|       name="z"
+|     <link>
+|       rel="foo"
+|     <style>
+|       "
+x { content:"</style" } "
+
+#data
+<!DOCTYPE html><select><optgroup></optgroup></select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <optgroup>
+
+#data
+ 
+ 
+#errors
+Line: 2 Col: 1 Unexpected End of file. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>  <html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><script>
+</script>  <title>x</title>  </head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <script>
+|       "
+"
+|     "  "
+|     <title>
+|       "x"
+|     "  "
+|   <body>
+
+#data
+<!DOCTYPE html><html><body><html id=x>
+#errors
+Line: 1 Col: 38 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</body><html id="x">
+#errors
+Line: 1 Col: 36 Unexpected start tag token (html) in the after body phase.
+Line: 1 Col: 36 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+|     "X"
+
+#data
+<!DOCTYPE html><head><html id=x>
+#errors
+Line: 1 Col: 32 html needs to be the first start tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   id="x"
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+Line: 1 Col: 24 Unexpected non-space characters in the after body phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "XX"
+
+#data
+<!DOCTYPE html>X</html> 
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X "
+
+#data
+<!DOCTYPE html>X</html>X
+#errors
+Line: 1 Col: 26 Unexpected start tag (p).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     
+|       "X"
+
+#data
+<!DOCTYPE html>X
+#errors
+Line: 1 Col: 19 Expected a > after the /.
+Line: 1 Col: 21 Solidus (/) incorrectly placed in tag.
+Line: 1 Col: 23 Solidus (/) incorrectly placed in tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     
+|       x=""
+|       y=""
+|       z=""
+
+#data
+<!DOCTYPE html><!--x--
+#errors
+Line: 1 Col: 22 Unexpected end of file in comment (--).
+#document
+| <!DOCTYPE html>
+| <!-- x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE html><table><tr><td></table>
+#errors
+Line: 1 Col: 34 Unexpected end tag (p). Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             
+
+#data
+<!DOCTYPE <!DOCTYPE HTML>><!--<!--x-->-->
+#errors
+Line: 1 Col: 20 Expected space or '>'. Got ''
+Line: 1 Col: 25 Erroneous DOCTYPE.
+Line: 1 Col: 35 Unexpected character in comment found.
+#document
+| <!DOCTYPE <!doctype>
+| <html>
+|   <head>
+|   <body>
+|     ">"
+|     <!-- <!--x -->
+|     "-->"
+
+#data
+<!doctype html><div><form></form><div></div></div>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <form>
+|       <div>
diff --git a/html5lib/tests/testdata/tree-construction/tests20.dat b/html5lib/tests/testdata/tree-construction/tests20.dat
new file mode 100644
index 00000000..6bd82560
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests20.dat
@@ -0,0 +1,455 @@
+#data
+<!doctype html><button><button>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|       <button>
+
+#data
+<!doctype html><button><address>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <address>
+
+#data
+<!doctype html><button><blockquote>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <blockquote>
+
+#data
+<!doctype html><button><menu>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <menu>
+
+#data
+<!doctype html><button>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         
+
+#data
+<!doctype html><button><ul>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <ul>
+
+#data
+<!doctype html><button><h1>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <h1>
+
+#data
+<!doctype html><button><h6>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <h6>
+
+#data
+<!doctype html><button><listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <listing>
+
+#data
+<!doctype html><button><pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <pre>
+
+#data
+<!doctype html><button><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <form>
+
+#data
+<!doctype html><button><li>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <li>
+
+#data
+<!doctype html><button><dd>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <dd>
+
+#data
+<!doctype html><button><dt>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <dt>
+
+#data
+<!doctype html><button><plaintext>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <plaintext>
+
+#data
+<!doctype html><button><table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <table>
+
+#data
+<!doctype html><button><hr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <hr>
+
+#data
+<!doctype html><button><xmp>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         <xmp>
+
+#data
+<!doctype html><button>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <button>
+|         
+
+#data
+<!doctype html><address><button></address>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<!doctype html><address><button></address>a
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <address>
+|       <button>
+|     "a"
+
+#data
+<table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       
+|       <table>
+
+#data
+<!doctype html><svg>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<!doctype html><figcaption>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <figcaption>
+
+#data
+<!doctype html><summary>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <summary>
+
+#data
+<!doctype html><form><table><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <table>
+
+#data
+<!doctype html><table><form><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><table><form></table><form>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <form>
+
+#data
+<!doctype html><svg><foreignObject>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         
+
+#data
+<!doctype html><svg><title>abc
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         "abc"
+
+#data
+<option><span><option>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|       <span>
+|         <option>
+
+#data
+<option><option>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <option>
+|     <option>
+
+#data
+<math><annotation-xml><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/svg+xml"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/svg+xml"
+|     <div>
+
+#data
+<math><annotation-xml encoding="application/xhtml+xml"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="application/xhtml+xml"
+|         <div>
+
+#data
+<math><annotation-xml encoding="aPPlication/xhtmL+xMl"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="aPPlication/xhtmL+xMl"
+|         <div>
+
+#data
+<math><annotation-xml encoding="text/html"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="text/html"
+|         <div>
+
+#data
+<math><annotation-xml encoding="Text/htmL"><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding="Text/htmL"
+|         <div>
+
+#data
+<math><annotation-xml encoding=" text/html "><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         encoding=" text/html "
+|     <div>
diff --git a/html5lib/tests/testdata/tree-construction/tests21.dat b/html5lib/tests/testdata/tree-construction/tests21.dat
new file mode 100644
index 00000000..1260ec03
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests21.dat
@@ -0,0 +1,221 @@
+#data
+<svg><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<math><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       "foo"
+
+#data
+<div><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[foo
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "foo"
+
+#data
+<svg><![CDATA[
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]] >]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]] >"
+
+#data
+<svg><![CDATA[]]
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]]"
+
+#data
+<svg><![CDATA[]
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]"
+
+#data
+<svg><![CDATA[]>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "]>a"
+
+#data
+<svg><foreignObject><div><![CDATA[foo]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         <div>
+|           <!-- [CDATA[foo]] -->
+
+#data
+<svg><![CDATA[<svg>]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[</svg>a]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>a"
+
+#data
+<svg><![CDATA[</svg>a
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "</svg>a"
+
+#data
+<svg><![CDATA[<svg>]]><path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <svg path>
+
+#data
+<svg><![CDATA[<svg>]]></path>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+
+#data
+<svg><![CDATA[<svg>]]><!--path-->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>"
+|       <!-- path -->
+
+#data
+<svg><![CDATA[<svg>]]>path
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<svg>path"
+
+#data
+<svg><![CDATA[<!--svg-->]]>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       "<!--svg-->"
diff --git a/html5lib/tests/testdata/tree-construction/tests22.dat b/html5lib/tests/testdata/tree-construction/tests22.dat
new file mode 100644
index 00000000..aab27b2e
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests22.dat
@@ -0,0 +1,157 @@
+#data
+<a><b><big><em><strong><div>X</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|         <big>
+|           <em>
+|             <strong>
+|     <big>
+|       <em>
+|         <strong>
+|           <div>
+|             <a>
+|               "X"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           "A"
+
+#data
+<a><b><div id=1><div id=2><div id=3><div id=4><div id=5><div id=6><div id=7><div id=8><div id=9><div id=10>A</a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       <b>
+|     <b>
+|       <div>
+|         id="1"
+|         <a>
+|         <div>
+|           id="2"
+|           <a>
+|           <div>
+|             id="3"
+|             <a>
+|             <div>
+|               id="4"
+|               <a>
+|               <div>
+|                 id="5"
+|                 <a>
+|                 <div>
+|                   id="6"
+|                   <a>
+|                   <div>
+|                     id="7"
+|                     <a>
+|                     <div>
+|                       id="8"
+|                       <a>
+|                         <div>
+|                           id="9"
+|                           <div>
+|                             id="10"
+|                             "A"
+
+#data
+<cite><b><cite><i><cite><i><cite><i><div>X</b>TEST
+#errors
+Line: 1 Col: 6 Unexpected start tag (cite). Expected DOCTYPE.
+Line: 1 Col: 46 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 50 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <cite>
+|       <b>
+|         <cite>
+|           <i>
+|             <cite>
+|               <i>
+|                 <cite>
+|                   <i>
+|       <i>
+|         <i>
+|           <div>
+|             <b>
+|               "X"
+|             "TEST"
diff --git a/html5lib/tests/testdata/tree-construction/tests23.dat b/html5lib/tests/testdata/tree-construction/tests23.dat
new file mode 100644
index 00000000..34d2a73f
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests23.dat
@@ -0,0 +1,155 @@
+#data
+<font size=4><font color=red><font size=4><font size=4><font size=4><font size=4><font size=4><font color=red>X
+#errors
+3: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+116: Unclosed elements.
+117: End of file seen and there were open elements.
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           color="red"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 size="4"
+|                 <font>
+|                   size="4"
+|                   <font>
+|                     size="4"
+|                     <font>
+|                       color="red"
+|     
+|       <font>
+|         color="red"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               <font>
+|                 color="red"
+|                 "X"
+
+#data
+<font size=4><font size=4><font size=4><font size=4>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             "X"
+
+#data
+<font size=4><font size=4><font size=4><font size="5"><font size=4>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="5"
+|               <font>
+|                 size="4"
+|     
+|       <font>
+|         size="4"
+|         <font>
+|           size="4"
+|           <font>
+|             size="5"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<font size=4 id=a><font size=4 id=b><font size=4><font size=4>X
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|     
+|       <font>
+|         id="a"
+|         size="4"
+|         <font>
+|           id="b"
+|           size="4"
+|           <font>
+|             size="4"
+|             <font>
+|               size="4"
+|               "X"
+
+#data
+<b id=a><b id=a><b id=a><b><object><b id=a><b id=a>X</object>Y
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               <object>
+|                 <b>
+|                   id="a"
+|                   <b>
+|                     id="a"
+|                     "X"
+|     
+|       <b>
+|         id="a"
+|         <b>
+|           id="a"
+|           <b>
+|             id="a"
+|             <b>
+|               "Y"
diff --git a/html5lib/tests/testdata/tree-construction/tests24.dat b/html5lib/tests/testdata/tree-construction/tests24.dat
new file mode 100644
index 00000000..f6dc7eb4
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests24.dat
@@ -0,0 +1,79 @@
+#data
+<!DOCTYPE html>&NotEqualTilde;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸"
+
+#data
+<!DOCTYPE html>&NotEqualTilde;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "≂̸A"
+
+#data
+<!DOCTYPE html>&ThickSpace;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  "
+
+#data
+<!DOCTYPE html>&ThickSpace;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "  A"
+
+#data
+<!DOCTYPE html>&NotSubset;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒"
+
+#data
+<!DOCTYPE html>&NotSubset;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "⊂⃒A"
+
+#data
+<!DOCTYPE html>&Gopf;
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾"
+
+#data
+<!DOCTYPE html>&Gopf;A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "𝔾A"
diff --git a/html5lib/tests/testdata/tree-construction/tests25.dat b/html5lib/tests/testdata/tree-construction/tests25.dat
new file mode 100644
index 00000000..00de7295
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests25.dat
@@ -0,0 +1,219 @@
+#data
+<!DOCTYPE html><body><foo>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       "A"
+
+#data
+<!DOCTYPE html><body><area>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <area>
+|     "A"
+
+#data
+<!DOCTYPE html><body><base>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <base>
+|     "A"
+
+#data
+<!DOCTYPE html><body><basefont>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <basefont>
+|     "A"
+
+#data
+<!DOCTYPE html><body><bgsound>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <bgsound>
+|     "A"
+
+#data
+<!DOCTYPE html><body>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     "A"
+
+#data
+<!DOCTYPE html><body><col>A
+#errors
+26: Stray start tag “col”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><command>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <command>
+|     "A"
+
+#data
+<!DOCTYPE html><body><embed>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <embed>
+|     "A"
+
+#data
+<!DOCTYPE html><body><frame>A
+#errors
+26: Stray start tag “frame”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "A"
+
+#data
+<!DOCTYPE html><body><hr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <hr>
+|     "A"
+
+#data
+<!DOCTYPE html><body><img>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|     "A"
+
+#data
+<!DOCTYPE html><body><input>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|     "A"
+
+#data
+<!DOCTYPE html><body><keygen>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <keygen>
+|     "A"
+
+#data
+<!DOCTYPE html><body><link>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <link>
+|     "A"
+
+#data
+<!DOCTYPE html><body><meta>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     "A"
+
+#data
+<!DOCTYPE html><body><param>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <param>
+|     "A"
+
+#data
+<!DOCTYPE html><body><source>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <source>
+|     "A"
+
+#data
+<!DOCTYPE html><body><track>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <track>
+|     "A"
+
+#data
+<!DOCTYPE html><body><wbr>A
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <wbr>
+|     "A"
diff --git a/html5lib/tests/testdata/tree-construction/tests26.dat b/html5lib/tests/testdata/tree-construction/tests26.dat
new file mode 100644
index 00000000..fae11ffd
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests26.dat
@@ -0,0 +1,313 @@
+#data
+<!DOCTYPE html><body><a href='#1'><nobr>1<nobr></a><a href='#2'><nobr>2<nobr></a><a href='#3'><nobr>3<nobr></a>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       href="#1"
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       
+|       <a>
+|         href="#2"
+|     <a>
+|       href="#2"
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       
+|       <a>
+|         href="#3"
+|     <a>
+|       href="#3"
+|       <nobr>
+|         "3"
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+|       <nobr>
+|     <nobr>
+|       "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <nobr>
+|           <i>
+|         <i>
+|           <nobr>
+|             "2"
+|           <nobr>
+|         <nobr>
+|           "3"
+|         <table>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<table><tr><td><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <table>
+|           <tbody>
+|             <tr>
+|               <td>
+|                 <nobr>
+|                   <i>
+|                 <i>
+|                   <nobr>
+|                     "2"
+|                   <nobr>
+|                 <nobr>
+|                   "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<div><nobr></b><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|     <div>
+|       <b>
+|         <nobr>
+|         <nobr>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr></b><div><i><nobr>2<nobr></i>3
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|     <div>
+|       <nobr>
+|         <i>
+|       <i>
+|         <nobr>
+|           "2"
+|         <nobr>
+|       <nobr>
+|         "3"
+
+#data
+<!DOCTYPE html><body><b><nobr>1<nobr><ins></b><i><nobr>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|       <nobr>
+|         <ins>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+
+#data
+<!DOCTYPE html><body><b><nobr>1<ins><nobr></b><i>2
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       <nobr>
+|         "1"
+|         <ins>
+|       <nobr>
+|     <nobr>
+|       <i>
+|         "2"
+
+#data
+<!DOCTYPE html><body><b>1<nobr></b><i><nobr>2</i>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "1"
+|       <nobr>
+|     <nobr>
+|       <i>
+|     <i>
+|       <nobr>
+|         "2"
+
+#data
+<code x</code>
+
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <code>
+|         code=""
+|         x<=""
+|     <code>
+|       code=""
+|       x<=""
+|       "
+"
+
+#data
+<!DOCTYPE html><svg><foreignObject><i>a
+#errors
+45: End tag “p” seen, but there were open elements.
+41: Unclosed element “i”.
+46: End of file seen and there were open elements.
+35: Unclosed element “foreignObject”.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg foreignObject>
+|         
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><svg><foreignObject><i>a
+#errors
+56: End tag “p” seen, but there were open elements.
+52: Unclosed element “i”.
+57: End of file seen and there were open elements.
+46: Unclosed element “foreignObject”.
+31: Unclosed element “svg”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <svg svg>
+|               <svg foreignObject>
+|                 
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><math><mtext><i>a
+#errors
+38: End tag “p” seen, but there were open elements.
+34: Unclosed element “i”.
+39: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mtext>
+|         
+|           <i>
+|         <i>
+|           "a"
+
+#data
+<!DOCTYPE html><table><tr><td><math><mtext><i>a
+#errors
+53: End tag “p” seen, but there were open elements.
+49: Unclosed element “i”.
+54: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mtext>
+|                 
+|                   <i>
+|                 <i>
+|                   "a"
+
+#data
+<!DOCTYPE html><body><div><!/div>a
+#errors
+29: Bogus comment.
+34: End of file seen and there were open elements.
+26: Unclosed element “div”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <!-- /div -->
+|       "a"
diff --git a/html5lib/tests/testdata/tree-construction/tests3.dat b/html5lib/tests/testdata/tree-construction/tests3.dat
new file mode 100644
index 00000000..38dc501b
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests3.dat
@@ -0,0 +1,305 @@
+#data
+<head></head><style></style>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected start tag (style) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <style>
+|   <body>
+
+#data
+<head></head><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected start tag (script) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <script>
+|   <body>
+
+#data
+<head></head><!-- --><style></style><!-- --><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+Line: 1 Col: 28 Unexpected start tag (style) that can be in head. Moved.
+#document
+| <html>
+|   <head>
+|     <style>
+|     <script>
+|   <!--   -->
+|   <!--   -->
+|   <body>
+
+#data
+<head></head><!-- -->x<style></style><!-- --><script></script>
+#errors
+Line: 1 Col: 6 Unexpected start tag (head). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <!--   -->
+|   <body>
+|     "x"
+|     <style>
+|     <!--   -->
+|     <script>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+
+foo</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>
+foo
+</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "foo
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x</pre><span>
+</span></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|     <span>
+|       "
+"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x
+y</pre></body></html>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x
+y"
+
+#data
+<!DOCTYPE html><html><head></head><body><pre>x<div>
+y</pre></body></html>
+#errors
+Line: 2 Col: 7 End tag (pre) seen too early. Expected other end tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "x"
+|       <div>
+|         "
+y"
+
+#data
+<!DOCTYPE html><pre>&#x0a;&#x0a;A</pre>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <pre>
+|       "
+A"
+
+#data
+<!DOCTYPE html><HTML><META><HEAD></HEAD></HTML>
+#errors
+Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <meta>
+|   <body>
+
+#data
+<!DOCTYPE html><HTML><HEAD><head></HEAD></HTML>
+#errors
+Line: 1 Col: 33 Unexpected start tag head in existing head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<textarea>foo<span>bar</span><i>baz
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+Line: 1 Col: 35 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo<span>bar</span><i>baz"
+
+#data
+<title>foo<span>bar</em><i>baz
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 30 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "foo<span>bar</em><i>baz"
+|   <body>
+
+#data
+<!DOCTYPE html><textarea>
+</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+
+#data
+<!DOCTYPE html><textarea>
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "foo"
+
+#data
+<!DOCTYPE html><textarea>
+
+foo</textarea>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       "
+foo"
+
+#data
+<!DOCTYPE html><html><head></head><body><ul><li><div><li></ul></body></html>
+#errors
+Line: 1 Col: 60 Missing end tag (div, li).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           
+|       <li>
+
+#data
+<!doctype html><nobr><nobr><nobr>
+#errors
+Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 33 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 33 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><nobr><nobr></nobr><nobr>
+#errors
+Line: 1 Col: 27 Unexpected start tag (nobr) implies end tag (nobr).
+Line: 1 Col: 40 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <nobr>
+|     <nobr>
+|     <nobr>
+
+#data
+<!doctype html><html><body><table></table></body></html>
+#errors
+Not known
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|     <table>
+
+#data
+<table></table>
+#errors
+Not known
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <table>
diff --git a/html5lib/tests/testdata/tree-construction/tests4.dat b/html5lib/tests/testdata/tree-construction/tests4.dat
new file mode 100644
index 00000000..3c506326
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests4.dat
@@ -0,0 +1,59 @@
+#data
+direct div content
+#errors
+#document-fragment
+div
+#document
+| "direct div content"
+
+#data
+direct textarea content
+#errors
+#document-fragment
+textarea
+#document
+| "direct textarea content"
+
+#data
+textarea content with <em>pseudo</em> <foo>markup
+#errors
+#document-fragment
+textarea
+#document
+| "textarea content with <em>pseudo</em> <foo>markup"
+
+#data
+this is &#x0043;DATA inside a <style> element
+#errors
+#document-fragment
+style
+#document
+| "this is &#x0043;DATA inside a <style> element"
+
+#data
+</plaintext>
+#errors
+#document-fragment
+plaintext
+#document
+| "</plaintext>"
+
+#data
+setting html's innerHTML
+#errors
+Line: 1 Col: 24 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "setting html's innerHTML"
+
+#data
+<title>setting head's innerHTML</title>
+#errors
+#document-fragment
+head
+#document
+| <title>
+|   "setting head's innerHTML"
diff --git a/html5lib/tests/testdata/tree-construction/tests5.dat b/html5lib/tests/testdata/tree-construction/tests5.dat
new file mode 100644
index 00000000..d7b5128a
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests5.dat
@@ -0,0 +1,191 @@
+#data
+<style> <!-- </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end of file. Expected end tag (style).
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|   <body>
+|     "x"
+
+#data
+<style> <!-- </style> --> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<style> <!--> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!--> "
+|   <body>
+|     "x"
+
+#data
+<style> <!---> </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!---> "
+|   <body>
+|     "x"
+
+#data
+<iframe> <!---> </iframe>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!---> "
+|     "x"
+
+#data
+<iframe> <!--- </iframe>->x</iframe> --> </iframe>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (iframe). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <iframe>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<script> <!-- </script> --> </script>x
+#errors
+Line: 1 Col: 8 Unexpected start tag (script). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <script>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<title> <!-- </title> --> </title>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       " <!-- "
+|     " "
+|   <body>
+|     "--> x"
+
+#data
+<textarea> <!--- </textarea>->x</textarea> --> </textarea>x
+#errors
+Line: 1 Col: 10 Unexpected start tag (textarea). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <textarea>
+|       " <!--- "
+|     "->x --> x"
+
+#data
+<style> <!</-- </style>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (style). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <style>
+|       " <!</-- "
+|   <body>
+|     "x"
+
+#data
+<xmp></xmp>
+#errors
+XXX: Unknown
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|     <xmp>
+
+#data
+<xmp> <!-- > --> </xmp>
+#errors
+Line: 1 Col: 5 Unexpected start tag (xmp). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <xmp>
+|       " <!-- > --> "
+
+#data
+<title>&amp;</title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<title><!--&amp;--></title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<title><!--</title>
+#errors
+Line: 1 Col: 7 Unexpected start tag (title). Expected DOCTYPE.
+Line: 1 Col: 19 Unexpected end of file. Expected end tag (title).
+#document
+| <html>
+|   <head>
+|     <title>
+|       "<!--"
+|   <body>
+
+#data
+<noscript><!--</noscript>--></noscript>
+#errors
+Line: 1 Col: 10 Unexpected start tag (noscript). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|     <noscript>
+|       "<!--"
+|   <body>
+|     "-->"
diff --git a/html5lib/tests/testdata/tree-construction/tests6.dat b/html5lib/tests/testdata/tree-construction/tests6.dat
new file mode 100644
index 00000000..f28ece4f
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests6.dat
@@ -0,0 +1,663 @@
+#data
+<!doctype html></head> <head>
+#errors
+Line: 1 Col: 29 Unexpected start tag head. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   " "
+|   <body>
+
+#data
+<!doctype html><form><div></form><div>
+#errors
+33: End tag "form" seen but there were unclosed elements.
+38: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <form>
+|       <div>
+|         <div>
+
+#data
+<!doctype html><title>&amp;</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "&"
+|   <body>
+
+#data
+<!doctype html><title><!--&amp;--></title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "<!--&-->"
+|   <body>
+
+#data
+<!doctype>
+#errors
+Line: 1 Col: 9 No space after literal string 'DOCTYPE'.
+Line: 1 Col: 10 Unexpected > character. Expected DOCTYPE name.
+Line: 1 Col: 10 Erroneous DOCTYPE.
+#document
+| <!DOCTYPE >
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!---x
+#errors
+Line: 1 Col: 6 Unexpected end of file in comment.
+Line: 1 Col: 6 Unexpected End of file. Expected DOCTYPE.
+#document
+| <!-- -x -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>
+<div>
+#errors
+Line: 1 Col: 6 Unexpected start tag (body).
+Line: 2 Col: 5 Expected closing tag. Unexpected end of file.
+#document-fragment
+div
+#document
+| "
+"
+| <div>
+
+#data
+<frameset></frameset>
+foo
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 3 Unexpected non-space characters in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+<noframes>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 10 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+|   <noframes>
+
+#data
+<frameset></frameset>
+<div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 5 Unexpected start tag (div) in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</html>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<frameset></frameset>
+</div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 2 Col: 6 Unexpected end tag (div) in the after frameset phase. Ignored.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   "
+"
+
+#data
+<form><form>
+#errors
+Line: 1 Col: 6 Unexpected start tag (form). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (form).
+Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <form>
+
+#data
+<button><button>
+#errors
+Line: 1 Col: 8 Unexpected start tag (button). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (button) implies end tag (button).
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <button>
+|     <button>
+
+#data
+<table><tr><td></th>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (th). Ignored.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (td). Ignored.
+Line: 1 Col: 20 Unexpected table cell start tag (td) in the table body phase.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><caption><div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 21 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+</caption><div>
+#errors
+Line: 1 Col: 10 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption><div></caption>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 31 Unexpected end tag (caption). Missing end tag (div).
+Line: 1 Col: 31 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><caption></table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 24 Unexpected end table tag in caption. Generates implied end caption.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+</table><div>
+#errors
+Line: 1 Col: 8 Unexpected end table tag in caption. Generates implied end caption.
+Line: 1 Col: 8 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 13 Expected closing tag. Unexpected end of file.
+#document-fragment
+caption
+#document
+| <div>
+
+#data
+<table><caption></body></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 23 Unexpected end tag (body). Ignored.
+Line: 1 Col: 29 Unexpected end tag (col). Ignored.
+Line: 1 Col: 40 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 47 Unexpected end tag (html). Ignored.
+Line: 1 Col: 55 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 60 Unexpected end tag (td). Ignored.
+Line: 1 Col: 68 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 73 Unexpected end tag (th). Ignored.
+Line: 1 Col: 81 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 86 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 86 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+
+#data
+<table><caption><div></div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 27 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <div>
+
+#data
+<table><tr><td></body></caption></col></colgroup></html>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end tag (body). Ignored.
+Line: 1 Col: 32 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 38 Unexpected end tag (col). Ignored.
+Line: 1 Col: 49 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 56 Unexpected end tag (html). Ignored.
+Line: 1 Col: 56 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+</table></tbody></tfoot></thead></tr><div>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 16 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 24 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 32 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 37 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 42 Expected closing tag. Unexpected end of file.
+#document-fragment
+td
+#document
+| <div>
+
+#data
+<table><colgroup>foo
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 20 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "foo"
+|     <table>
+|       <colgroup>
+
+#data
+foo<col>
+#errors
+Line: 1 Col: 3 Unexpected end tag (colgroup). Ignored.
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<table><colgroup></col>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 23 This element (col) has no end tag.
+Line: 1 Col: 23 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <colgroup>
+
+#data
+<frameset><div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag token (div) in the frameset phase. Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</frameset><frame>
+#errors
+Line: 1 Col: 11 Unexpected end tag token (frameset) in the frameset phase (innerHTML).
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+<frameset></div>
+#errors
+Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected end tag token (div) in the frameset phase. Ignored.
+Line: 1 Col: 16 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><div>
+#errors
+Line: 1 Col: 7 Unexpected end tag (body). Ignored.
+Line: 1 Col: 12 Expected closing tag. Unexpected end of file.
+#document-fragment
+body
+#document
+| <div>
+
+#data
+<table><tr><div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 16 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+Line: 1 Col: 5 Unexpected end tag (tr). Ignored.
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</tbody></tfoot></thead><td>
+#errors
+Line: 1 Col: 8 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 16 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 24 Unexpected end tag (thead). Ignored.
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<table><tr><div><td>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 16 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 20 Unexpected implied end tag (div) in the table row phase.
+Line: 1 Col: 20 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<caption><col><colgroup><tbody><tfoot><thead><tr>
+#errors
+Line: 1 Col: 9 Unexpected start tag (caption).
+Line: 1 Col: 14 Unexpected start tag (col).
+Line: 1 Col: 24 Unexpected start tag (colgroup).
+Line: 1 Col: 31 Unexpected start tag (tbody).
+Line: 1 Col: 38 Unexpected start tag (tfoot).
+Line: 1 Col: 45 Unexpected start tag (thead).
+Line: 1 Col: 49 Unexpected end of file. Expected table content.
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></thead>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 22 Unexpected end tag (thead) in the table body phase. Ignored.
+Line: 1 Col: 22 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+</table><tr>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 12 Unexpected end of file. Expected table content.
+#document-fragment
+tbody
+#document
+| <tr>
+
+#data
+<table><tbody></body></caption></col></colgroup></html></td></th></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 21 Unexpected end tag (body) in the table body phase. Ignored.
+Line: 1 Col: 31 Unexpected end tag (caption) in the table body phase. Ignored.
+Line: 1 Col: 37 Unexpected end tag (col) in the table body phase. Ignored.
+Line: 1 Col: 48 Unexpected end tag (colgroup) in the table body phase. Ignored.
+Line: 1 Col: 55 Unexpected end tag (html) in the table body phase. Ignored.
+Line: 1 Col: 60 Unexpected end tag (td) in the table body phase. Ignored.
+Line: 1 Col: 65 Unexpected end tag (th) in the table body phase. Ignored.
+Line: 1 Col: 70 Unexpected end tag (tr) in the table body phase. Ignored.
+Line: 1 Col: 70 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><tbody></div>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 20 Unexpected end tag (div) in table context caused voodoo mode.
+Line: 1 Col: 20 End tag (div) seen too early. Expected other end tag.
+Line: 1 Col: 20 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+
+#data
+<table><table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected start tag (table) implies end tag (table).
+Line: 1 Col: 14 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|     <table>
+
+#data
+<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 14 Unexpected end tag (body). Ignored.
+Line: 1 Col: 24 Unexpected end tag (caption). Ignored.
+Line: 1 Col: 30 Unexpected end tag (col). Ignored.
+Line: 1 Col: 41 Unexpected end tag (colgroup). Ignored.
+Line: 1 Col: 48 Unexpected end tag (html). Ignored.
+Line: 1 Col: 56 Unexpected end tag (tbody). Ignored.
+Line: 1 Col: 61 Unexpected end tag (td). Ignored.
+Line: 1 Col: 69 Unexpected end tag (tfoot). Ignored.
+Line: 1 Col: 74 Unexpected end tag (th). Ignored.
+Line: 1 Col: 82 Unexpected end tag (thead). Ignored.
+Line: 1 Col: 87 Unexpected end tag (tr). Ignored.
+Line: 1 Col: 87 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+
+#data
+</table><tr>
+#errors
+Line: 1 Col: 8 Unexpected end tag (table). Ignored.
+Line: 1 Col: 12 Unexpected end of file. Expected table content.
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<body></body></html>
+#errors
+Line: 1 Col: 20 Unexpected html end tag in inner html mode.
+Line: 1 Col: 20 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+
+#data
+<html><frameset></frameset></html> 
+#errors
+Line: 1 Col: 6 Unexpected start tag (html). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <frameset>
+|   " "
+
+#data
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html></html>
+#errors
+Line: 1 Col: 50 Erroneous DOCTYPE.
+Line: 1 Col: 63 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "">
+| <html>
+|   <head>
+|   <body>
+
+#data
+<param><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (param). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<source><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (source). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<track><frameset></frameset>
+#errors
+Line: 1 Col: 7 Unexpected start tag (track). Expected DOCTYPE.
+Line: 1 Col: 17 Unexpected start tag (frameset).
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</html><frameset></frameset>
+#errors
+7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+17: Stray “frameset” start tag.
+17: “frameset” start tag seen.
+#document
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+</body><frameset></frameset>
+#errors
+7: End tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+17: Stray “frameset” start tag.
+17: “frameset” start tag seen.
+#document
+| <html>
+|   <head>
+|   <frameset>
diff --git a/html5lib/tests/testdata/tree-construction/tests7.dat b/html5lib/tests/testdata/tree-construction/tests7.dat
new file mode 100644
index 00000000..f5193c66
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests7.dat
@@ -0,0 +1,390 @@
+#data
+<!doctype html><body><title>X</title>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+
+#data
+<!doctype html><table><title>X</title></table>
+#errors
+Line: 1 Col: 29 Unexpected start tag (title) in table context caused voodoo mode.
+Line: 1 Col: 38 Unexpected end tag (title) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <title>
+|       "X"
+|     <table>
+
+#data
+<!doctype html><head></head><title>X</title>
+#errors
+Line: 1 Col: 35 Unexpected start tag (title) that can be in head. Moved.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html></head><title>X</title>
+#errors
+Line: 1 Col: 29 Unexpected start tag (title) that can be in head. Moved.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <title>
+|       "X"
+|   <body>
+
+#data
+<!doctype html><table><meta></table>
+#errors
+Line: 1 Col: 28 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <meta>
+|     <table>
+
+#data
+<!doctype html><table>X<tr><td><table> <meta></table></table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 45 Unexpected start tag (meta) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <meta>
+|             <table>
+|               " "
+
+#data
+<!doctype html><html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html> <head>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!doctype html><table><style> <tr>x </style> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <style>
+|         " <tr>x "
+|       " "
+
+#data
+<!doctype html><table><TBODY><script> <tr>x </script> </table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <script>
+|           " <tr>x "
+|         " "
+
+#data
+<!doctype html><applet>X</applet>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     
+|       <applet>
+|         
+|           "X"
+
+#data
+<!doctype html><listing>
+X</listing>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <listing>
+|       "X"
+
+#data
+<!doctype html><select><input>X
+#errors
+Line: 1 Col: 30 Unexpected input start tag in the select phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <input>
+|     "X"
+
+#data
+<!doctype html><select><select>X
+#errors
+Line: 1 Col: 31 Unexpected select start tag in the select phase treated as select end tag.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     "X"
+
+#data
+<!doctype html><table><input type=hidDEN></table>
+#errors
+Line: 1 Col: 41 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>X<input type=hidDEN></table>
+#errors
+Line: 1 Col: 23 Unexpected non-space characters in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     "X"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type=hidDEN></table>
+#errors
+Line: 1 Col: 43 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table>  <input type='hidDEN'></table>
+#errors
+Line: 1 Col: 45 Unexpected input with type hidden in table context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       "  "
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><input type=" hidden"><input type=hidDEN></table>
+#errors
+Line: 1 Col: 44 Unexpected start tag (input) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type=" hidden"
+|     <table>
+|       <input>
+|         type="hidDEN"
+
+#data
+<!doctype html><table><select>X<tr>
+#errors
+Line: 1 Col: 30 Unexpected start tag (select) in table context caused voodoo mode.
+Line: 1 Col: 35 Unexpected table element start tag (trs) in the select in table phase.
+Line: 1 Col: 35 Unexpected end of file. Expected table content.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!doctype html><select>X</select>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!DOCTYPE hTmL><html></html>
+#errors
+Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<!DOCTYPE HTML><html></html>
+#errors
+Line: 1 Col: 28 Unexpected end tag (html) after the (implied) root element.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+
+#data
+<body>X</body></body>
+#errors
+Line: 1 Col: 21 Unexpected end tag token (body) in the after body phase.
+Line: 1 Col: 21 Unexpected EOF in inner html mode.
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   "X"
+
+#data
+<div>a</x> b
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 13 Unexpected end tag (x). Ignored.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       
+|         "a b"
+
+#data
+<table><tr><td><code></code> </table>
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <code>
+|             " "
+
+#data
+<table><b><tr><td>aaa</td></tr>bbb</table>ccc
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     <b>
+|       "bbb"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "aaa"
+|     <b>
+|       "ccc"
+
+#data
+A<table><tr> B</tr> B</table>
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A B B"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+A<table><tr> B</tr> </em>C</table>
+#errors
+XXX: Fix me
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A BC"
+|     <table>
+|       <tbody>
+|         <tr>
+|         " "
+
+#data
+<select><keygen>
+#errors
+Not known
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|     <keygen>
diff --git a/html5lib/tests/testdata/tree-construction/tests8.dat b/html5lib/tests/testdata/tree-construction/tests8.dat
new file mode 100644
index 00000000..90e6c919
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests8.dat
@@ -0,0 +1,148 @@
+#data
+<div>
+<div></div>
+</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 3 Col: 7 Unexpected end tag (span). Ignored.
+Line: 3 Col: 8 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "
+"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>
+</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 2 Col: 7 Unexpected end tag (span). Ignored.
+Line: 2 Col: 8 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "
+x"
+
+#data
+<div>x<div></div>x</span>x
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end tag (span). Ignored.
+Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+
+#data
+<div>x<div></div>y</span>z
+#errors
+Line: 1 Col: 5 Unexpected start tag (div). Expected DOCTYPE.
+Line: 1 Col: 25 Unexpected end tag (span). Ignored.
+Line: 1 Col: 26 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "yz"
+
+#data
+<table><div>x<div></div>x</span>x
+#errors
+Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
+Line: 1 Col: 12 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 18 Unexpected start tag (div) in table context caused voodoo mode.
+Line: 1 Col: 24 Unexpected end tag (div) in table context caused voodoo mode.
+Line: 1 Col: 32 Unexpected end tag (span) in table context caused voodoo mode.
+Line: 1 Col: 32 Unexpected end tag (span). Ignored.
+Line: 1 Col: 33 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "x"
+|       <div>
+|       "xx"
+|     <table>
+
+#data
+x<table>x
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+Line: 1 Col: 9 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 9 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "xx"
+|     <table>
+
+#data
+x<table><table>x
+#errors
+Line: 1 Col: 1 Unexpected non-space characters. Expected DOCTYPE.
+Line: 1 Col: 15 Unexpected start tag (table) implies end tag (table).
+Line: 1 Col: 16 Unexpected non-space characters in table context caused voodoo mode.
+Line: 1 Col: 16 Unexpected end of file. Expected table content.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <table>
+|     "x"
+|     <table>
+
+#data
+<b>a<div></div><div></b>y
+#errors
+Line: 1 Col: 3 Unexpected start tag (b). Expected DOCTYPE.
+Line: 1 Col: 24 End tag (b) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 25 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|       "a"
+|       <div>
+|     <div>
+|       <b>
+|       "y"
+
+#data
+<a><div></a>
+#errors
+Line: 1 Col: 3 Unexpected start tag (a). Expected DOCTYPE.
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 15 End tag (a) violates step 1, paragraph 3 of the adoption agency algorithm.
+Line: 1 Col: 15 Expected closing tag. Unexpected end of file.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <div>
+|       <a>
+|       
+|         <a>
diff --git a/html5lib/tests/testdata/tree-construction/tests9.dat b/html5lib/tests/testdata/tree-construction/tests9.dat
new file mode 100644
index 00000000..554e27ae
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests9.dat
@@ -0,0 +1,457 @@
+#data
+<!DOCTYPE html><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><body><math></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+
+#data
+<!DOCTYPE html><math><mi>
+#errors
+25: End of file in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+
+#data
+<!DOCTYPE html><math><annotation-xml><svg><u>
+#errors
+45: HTML start tag “u” in a foreign namespace context.
+45: End of file seen and there were open elements.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math annotation-xml>
+|         <svg svg>
+|     <u>
+
+#data
+<!DOCTYPE html><body><select><math></math></select>
+#errors
+Line: 1 Col: 35 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 42 Unexpected end tag (math) in the select phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+
+#data
+<!DOCTYPE html><body><select><option><math></math></option></select>
+#errors
+Line: 1 Col: 43 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 50 Unexpected end tag (math) in the select phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+
+#data
+<!DOCTYPE html><body><table><math></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 41 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 53 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><math><mi>foo</mi><mi>bar</mi></math></table>
+#errors
+Line: 1 Col: 34 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 46 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 58 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 65 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+
+#data
+<!DOCTYPE html><body><table><tbody><math><mi>foo</mi><mi>bar</mi></math></tbody></table>
+#errors
+Line: 1 Col: 41 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 53 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 65 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 72 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><math><mi>foo</mi><mi>bar</mi></math></tr></tbody></table>
+#errors
+Line: 1 Col: 45 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 57 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 69 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 76 Unexpected end tag (math) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math></td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+
+#data
+<!DOCTYPE html><body><table><tbody><tr><td><math><mi>foo</mi><mi>bar</mi></math><p>baz</td></tr></tbody></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <math math>
+|               <math mi>
+|                 "foo"
+|               <math mi>
+|                 "bar"
+|             
+|               "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi></math><p>baz</caption></table>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         
+|           "baz"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+Line: 1 Col: 70 HTML start tag "p" in a foreign namespace context.
+Line: 1 Col: 81 Unexpected end table tag in caption. Generates implied end caption.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|         
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><caption><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+Line: 1 Col: 78 Unexpected end table tag in caption. Generates implied end caption.
+Line: 1 Col: 78 Unexpected end tag (caption). Missing end tag (math).
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <caption>
+|         <math math>
+|           <math mi>
+|             "foo"
+|           <math mi>
+|             "bar"
+|           "baz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><colgroup><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+Line: 1 Col: 44 Unexpected start tag (math) in table context caused voodoo mode.
+Line: 1 Col: 56 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 68 Unexpected end tag (mi) in table context caused voodoo mode.
+Line: 1 Col: 71 HTML start tag "p" in a foreign namespace context.
+Line: 1 Col: 71 Unexpected start tag (p) in table context caused voodoo mode.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+|     <table>
+|       <colgroup>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><tr><td><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+Line: 1 Col: 50 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 54 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 62 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 66 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 74 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 77 Unexpected start tag token (p) in the select phase. Ignored.
+Line: 1 Col: 88 Unexpected table element end tag (tables) in the select in table phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             <select>
+|               "foobarbaz"
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body><table><select><math><mi>foo</mi><mi>bar</mi>baz</table>quux
+#errors
+Line: 1 Col: 36 Unexpected start tag (select) in table context caused voodoo mode.
+Line: 1 Col: 42 Unexpected start tag token (math) in the select phase. Ignored.
+Line: 1 Col: 46 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 54 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 58 Unexpected start tag token (mi) in the select phase. Ignored.
+Line: 1 Col: 66 Unexpected end tag (mi) in the select phase. Ignored.
+Line: 1 Col: 69 Unexpected start tag token (p) in the select phase. Ignored.
+Line: 1 Col: 80 Unexpected table element end tag (tables) in the select in table phase.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "foobarbaz"
+|     <table>
+|     
+|       "quux"
+
+#data
+<!DOCTYPE html><body></body></html><math><mi>foo</mi><mi>bar</mi>baz
+#errors
+Line: 1 Col: 41 Unexpected start tag (math).
+Line: 1 Col: 68 HTML start tag "p" in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><body></body><math><mi>foo</mi><mi>bar</mi>baz
+#errors
+Line: 1 Col: 34 Unexpected start tag token (math) in the after body phase.
+Line: 1 Col: 61 HTML start tag "p" in a foreign namespace context.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mi>
+|         "foo"
+|       <math mi>
+|         "bar"
+|     
+|       "baz"
+
+#data
+<!DOCTYPE html><frameset><math><mi></mi><mi></mi><span>
+#errors
+Line: 1 Col: 31 Unexpected start tag token (math) in the frameset phase. Ignored.
+Line: 1 Col: 35 Unexpected start tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 40 Unexpected end tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 44 Unexpected start tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 49 Unexpected end tag token (mi) in the frameset phase. Ignored.
+Line: 1 Col: 52 Unexpected start tag token (p) in the frameset phase. Ignored.
+Line: 1 Col: 58 Unexpected start tag token (span) in the frameset phase. Ignored.
+Line: 1 Col: 58 Expected closing tag. Unexpected end of file.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><frameset></frameset><math><mi></mi><mi></mi><span>
+#errors
+Line: 1 Col: 42 Unexpected start tag (math) in the after frameset phase. Ignored.
+Line: 1 Col: 46 Unexpected start tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 51 Unexpected end tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 55 Unexpected start tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 60 Unexpected end tag (mi) in the after frameset phase. Ignored.
+Line: 1 Col: 63 Unexpected start tag (p) in the after frameset phase. Ignored.
+Line: 1 Col: 69 Unexpected start tag (span) in the after frameset phase. Ignored.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!DOCTYPE html><body xlink:href=foo><math xlink:href=foo></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     <math math>
+|       xlink href="foo"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo></mi></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo /></math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+
+#data
+<!DOCTYPE html><body xlink:href=foo xml:lang=en><math><mi xml:lang=en xlink:href=foo />bar</math>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     xlink:href="foo"
+|     xml:lang="en"
+|     <math math>
+|       <math mi>
+|         xlink href="foo"
+|         xml lang="en"
+|       "bar"
diff --git a/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat b/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
new file mode 100644
index 00000000..6c78661e
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
@@ -0,0 +1,741 @@
+#data
+<body><span>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><body>
+#errors
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<body><span>
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+|   <span>
+
+#data
+<frameset><span>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+#document-fragment
+body
+#document
+| <span>
+
+#data
+<span><frameset>
+#errors
+#document-fragment
+div
+#document
+| <span>
+
+#data
+<frameset><span>
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <frameset>
+
+#data
+<table><tr>
+#errors
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+</table><tr>
+#errors
+#document-fragment
+table
+#document
+| <tbody>
+|   <tr>
+
+#data
+<a>
+#errors
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a>
+#errors
+#document-fragment
+table
+#document
+| <a>
+
+#data
+<a><caption>a
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <caption>
+|   "a"
+
+#data
+<a><colgroup><col>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <colgroup>
+|   <col>
+
+#data
+<a><tbody><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><tfoot><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tfoot>
+|   <tr>
+
+#data
+<a><thead><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <thead>
+|   <tr>
+
+#data
+<a><tr>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+
+#data
+<a><th>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <th>
+
+#data
+<a><td>
+#errors
+#document-fragment
+table
+#document
+| <a>
+| <tbody>
+|   <tr>
+|     <td>
+
+#data
+<table></table><tbody>
+#errors
+#document-fragment
+caption
+#document
+| <table>
+
+#data
+</table><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></table>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+</caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+
+#data
+<span></caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><caption><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><col><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><colgroup><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><html><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tbody><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><td><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tfoot><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><thead><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><th><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span><tr><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+<span></table><span>
+#errors
+#document-fragment
+caption
+#document
+| <span>
+|   <span>
+
+#data
+</colgroup><col>
+#errors
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<a><col>
+#errors
+#document-fragment
+colgroup
+#document
+| <col>
+
+#data
+<caption><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+
+#data
+<a><tr>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<a><td>
+#errors
+#document-fragment
+tbody
+#document
+| <a>
+| <tr>
+|   <td>
+
+#data
+<td><table><tbody><a><tr>
+#errors
+#document-fragment
+tbody
+#document
+| <tr>
+|   <td>
+|     <a>
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+</tr><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table><a><tr></tr><tr>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <a>
+|   <table>
+|     <tbody>
+|       <tr>
+|       <tr>
+
+#data
+<caption><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<col><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<colgroup><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tbody><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tfoot><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<thead><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<tr><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+</table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<td><table></table><td>
+#errors
+#document-fragment
+tr
+#document
+| <td>
+|   <table>
+| <td>
+
+#data
+<caption><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<col><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<colgroup><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tbody><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tfoot><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<th><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<thead><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<tr><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</table><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tbody><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</td><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tfoot><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</thead><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</th><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+</tr><a>
+#errors
+#document-fragment
+td
+#document
+| <a>
+
+#data
+<table><td><td>
+#errors
+#document-fragment
+td
+#document
+| <table>
+|   <tbody>
+|     <tr>
+|       <td>
+|       <td>
+
+#data
+</select><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<input><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<keygen><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+<textarea><option>
+#errors
+#document-fragment
+select
+#document
+| <option>
+
+#data
+</html><!--abc-->
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
+| <!-- abc -->
+
+#data
+</frameset><frame>
+#errors
+#document-fragment
+frameset
+#document
+| <frame>
+
+#data
+#errors
+#document-fragment
+html
+#document
+| <head>
+| <body>
diff --git a/html5lib/tests/testdata/tree-construction/tricky01.dat b/html5lib/tests/testdata/tree-construction/tricky01.dat
new file mode 100644
index 00000000..08419924
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/tricky01.dat
@@ -0,0 +1,261 @@
+#data
+<b>Bold </b> Not bold
+Also not bold.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <b>
+|     
+|       <b>
+|         "Bold "
+|       " Not bold"
+|     "
+Also not bold."
+
+#data
+<html>
+<font color=red><i>Italic and RedItalic and Red </font> Just italic. Italic only.</i> Plain
+I should not be red. <font color=red>Red. <i>Italic and red.
+Italic and red. </i> Red.</font> I should not be red.
+<b>Bold <i>Bold and italic</b> Only Italic </i> Plain
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       color="red"
+|       <i>
+|         "Italic and Red"
+|     <i>
+|       
+|         <font>
+|           color="red"
+|           "Italic and Red "
+|         " Just italic."
+|       " Italic only."
+|     " Plain
+"
+|     
+|       "I should not be red. "
+|       <font>
+|         color="red"
+|         "Red. "
+|         <i>
+|           "Italic and red."
+|     <font>
+|       color="red"
+|       <i>
+|         "
+"
+|     
+|       <font>
+|         color="red"
+|         <i>
+|           "Italic and red. "
+|         " Red."
+|       " I should not be red."
+|     "
+"
+|     <b>
+|       "Bold "
+|       <i>
+|         "Bold and italic"
+|     <i>
+|       " Only Italic "
+|     " Plain"
+
+#data
+<html><body>
+<font size="7">First paragraph.
+Second paragraph.</font>
+<b><i>Bold and Italic</b> Italic
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     
+|       <font>
+|         size="7"
+|         "First paragraph."
+|     <font>
+|       size="7"
+|       "
+"
+|       
+|         "Second paragraph."
+|     "
+"
+|     <b>
+|     
+|       <b>
+|         <i>
+|           "Bold and Italic"
+|       <i>
+|         " Italic"
+
+#data
+<html>
+<dl>
+<dt><b>Boo
+<dd>Goo?
+</dl>
+</html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dl>
+|       "
+"
+|       <dt>
+|         <b>
+|           "Boo
+"
+|       <dd>
+|         <b>
+|           "Goo?
+"
+|     <b>
+|       "
+"
+
+#data
+<html><body>
+<label><a><div>Hello<div>World</div></a></label>  
+</body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <label>
+|       <a>
+|       <div>
+|         <a>
+|           "Hello"
+|           <div>
+|             "World"
+|         "  
+"
+
+#data
+<table><center> <font>a</center> <img> <tr><td> </td> </tr> </table>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       " "
+|       <font>
+|         "a"
+|     <font>
+|       <img>
+|       " "
+|     <table>
+|       " "
+|       <tbody>
+|         <tr>
+|           <td>
+|             " "
+|           " "
+|         " "
+
+#data
+<table><tr><a>You should see this text.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       <a>
+|     
+|       <a>
+|         "You should see this text."
+|     <table>
+|       <tbody>
+|         <tr>
+
+#data
+<TABLE>
+<TR>
+<CENTER><CENTER><TD></TD></TR><TR>
+<FONT>
+<TABLE><tr></tr></TABLE>
+
+<a></font><font></a>
+This page contains an insanely badly-nested tag sequence.
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <center>
+|       <center>
+|     <font>
+|       "
+"
+|     <table>
+|       "
+"
+|       <tbody>
+|         <tr>
+|           "
+"
+|           <td>
+|         <tr>
+|           "
+"
+|     <table>
+|       <tbody>
+|         <tr>
+|     <font>
+|       "
+"
+|       
+|       "
+"
+|       <a>
+|     <a>
+|       <font>
+|     <font>
+|       "
+This page contains an insanely badly-nested tag sequence."
+
+#data
+<html>
+<body>
+<b><nobr><div>This text is in a div inside a nobr</nobr>More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. </b><pre>A pre tag outside everything else.</pre>
+</body>
+</html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "
+"
+|     <b>
+|       <nobr>
+|     <div>
+|       <b>
+|         <nobr>
+|           "This text is in a div inside a nobr"
+|         "More text that should not be in the nobr, i.e., the
+nobr should have closed the div inside it implicitly. "
+|       <pre>
+|         "A pre tag outside everything else."
+|       "
+
+"
diff --git a/html5lib/tests/testdata/tree-construction/webkit01.dat b/html5lib/tests/testdata/tree-construction/webkit01.dat
new file mode 100644
index 00000000..06bc436b
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/webkit01.dat
@@ -0,0 +1,594 @@
+#data
+Test
+#errors
+Line: 1 Col: 4 Unexpected non-space characters. Expected DOCTYPE.
+#document
+| <html>
+|   <head>
+|   <body>
+|     "Test"
+
+#data
+<div></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+
+#data
+<div>Test</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Test"
+
+#data
+<di
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div>Hello</div>
+<script>
+console.log("PASS");
+</script>
+<div>Bye</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("PASS");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<div foo="bar">Hello</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo="bar"
+|       "Hello"
+
+#data
+<div>Hello</div>
+<script>
+console.log("FOO<span>BAR</span>BAZ");
+</script>
+<div>Bye</div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       "Hello"
+|     "
+"
+|     <script>
+|       "
+console.log("FOO<span>BAR</span>BAZ");
+"
+|     "
+"
+|     <div>
+|       "Bye"
+
+#data
+<foo bar="baz"></foo><potato quack="duck"></potato>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|     <potato>
+|       quack="duck"
+
+#data
+<foo bar="baz"><potato quack="duck"></potato></foo>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="baz"
+|       <potato>
+|         quack="duck"
+
+#data
+<foo></foo bar="baz"><potato></potato quack="duck">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|     <potato>
+
+#data
+</ tttt>
+#errors
+#document
+| <!--  tttt -->
+| <html>
+|   <head>
+|   <body>
+
+#data
+<div FOO ><img><img></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       foo=""
+|       <img>
+|       <img>
+
+#data
+TestTest2
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+|       "TestTest2"
+
+#data
+<rdar://problem/6869687>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <rdar:>
+|       6869687=""
+|       problem=""
+
+#data
+<A>test< /A>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|       "test< /A>"
+
+#data
+&lt;
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "<"
+
+#data
+<body foo='bar'><body foo='baz' yo='mama'>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     foo="bar"
+|     yo="mama"
+
+#data
+<body></br foo="bar"></body>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<bdy></body>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       
+|         foo="bar"
+
+#data
+<body></body></br foo="bar">
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     
+
+#data
+<bdy></body>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <bdy>
+|       
+|         foo="bar"
+
+#data
+<html><body></body></html><!-- Hi there -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+| <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></html><!-- Again -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body></body></html>x<!-- Hi there --></body></html><!-- Again -->
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "x"
+|     <!--  Hi there  -->
+| <!--  Again  -->
+
+#data
+<html><body><ruby><div><rp>xx</rp></div></ruby></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rp>
+|           "xx"
+
+#data
+<html><body><ruby><div><rt>xx</rt></div></ruby></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ruby>
+|       <div>
+|         <rt>
+|           "xx"
+
+#data
+<html><frameset><!--1--><noframes>A</noframes><!--2--></frameset><!--3--><noframes>B</noframes><!--4--></html><!--5--><noframes>C</noframes><!--6-->
+#errors
+#document
+| <html>
+|   <head>
+|   <frameset>
+|     <!-- 1 -->
+|     <noframes>
+|       "A"
+|     <!-- 2 -->
+|   <!-- 3 -->
+|   <noframes>
+|     "B"
+|   <!-- 4 -->
+|   <noframes>
+|     "C"
+| <!-- 5 -->
+| <!-- 6 -->
+
+#data
+<select><option>A<select><option>B<select><option>C<select><option>D<select><option>E<select><option>F<select><option>G<select>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       <option>
+|         "A"
+|     <option>
+|       "B"
+|       <select>
+|         <option>
+|           "C"
+|     <option>
+|       "D"
+|       <select>
+|         <option>
+|           "E"
+|     <option>
+|       "F"
+|       <select>
+|         <option>
+|           "G"
+
+#data
+<dd><dd><dt><dt><dd><li><li>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <dd>
+|     <dd>
+|     <dt>
+|     <dt>
+|     <dd>
+|       <li>
+|       <li>
+
+#data
+<div><b></div><div><nobr>a<nobr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <b>
+|     <div>
+|       <b>
+|         <nobr>
+|           "a"
+|         <nobr>
+
+#data
+<head></head>
+<body></body>
+#errors
+#document
+| <html>
+|   <head>
+|   "
+"
+|   <body>
+
+#data
+<head></head> <style></style>ddd
+#errors
+#document
+| <html>
+|   <head>
+|     <style>
+|   " "
+|   <body>
+|     "ddd"
+
+#data
+<kbd><table></kbd><col><select><tr>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+
+#data
+<kbd><table></kbd><col><select><tr></table><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <kbd>
+|       <select>
+|       <table>
+|         <colgroup>
+|           <col>
+|         <tbody>
+|           <tr>
+|       <div>
+
+#data
+<a><li><style></style><title></title></a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <li>
+|       <a>
+|         <style>
+|         <title>
+
+#data
+<font><meta><title></title></font>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <font>
+|       
+|     
+|       <font>
+|         <meta>
+|         <title>
+
+#data
+<a><center><title></title><a>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <a>
+|     <center>
+|       <a>
+|         <title>
+|       <a>
+
+#data
+<svg><title><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <div>
+
+#data
+<svg><title><rect><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <rect>
+|           <div>
+
+#data
+<svg><title><svg><div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <svg svg>
+|         <div>
+
+#data
+<img <="" FAIL>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <img>
+|       <=""
+|       fail=""
+
+#data
+<ul><li><div id='foo'/>A</li><li>B<div>C</div></li></ul>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <ul>
+|       <li>
+|         <div>
+|           id="foo"
+|           "A"
+|       <li>
+|         "B"
+|         <div>
+|           "C"
+
+#data
+<svg><em><desc></em>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|     <em>
+|       <desc>
+
+#data
+<svg><tfoot></mi><td>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg tfoot>
+|         <svg td>
+
+#data
+<math><mrow><mrow><mn>1</mn></mrow><mi>a</mi></mrow></math>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <math math>
+|       <math mrow>
+|         <math mrow>
+|           <math mn>
+|             "1"
+|         <math mi>
+|           "a"
+
+#data
+<!doctype html><input type="hidden"><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><input type="button"><frameset>
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <input>
+|       type="button"
diff --git a/html5lib/tests/testdata/tree-construction/webkit02.dat b/html5lib/tests/testdata/tree-construction/webkit02.dat
new file mode 100644
index 00000000..468879b3
--- /dev/null
+++ b/html5lib/tests/testdata/tree-construction/webkit02.dat
@@ -0,0 +1,94 @@
+#data
+<foo bar=qux/>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <foo>
+|       bar="qux/"
+
+#data
+<p id="status"><noscript><strong>A</strong></noscript><span>B</span></p>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <p>
+|       id="status"
+|       <noscript>
+|         "<strong>A</strong>"
+|       <span>
+|         "B"
+
+#data
+<div><sarcasm><div></div></sarcasm></div>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <div>
+|       <sarcasm>
+|         <div>
+
+#data
+<html><body><img src="" border="0" alt="><div>A</div></body></html>
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+
+#data
+<table><td></tbody>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     "A"
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+
+#data
+<table><td></thead>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><td></tfoot>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <tbody>
+|         <tr>
+|           <td>
+|             "A"
+
+#data
+<table><thead><td></tbody>A
+#errors
+#document
+| <html>
+|   <head>
+|   <body>
+|     <table>
+|       <thead>
+|         <tr>
+|           <td>
+|             "A"
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
new file mode 100644
index 00000000..d0fef746
--- /dev/null
+++ b/html5lib/tests/tokenizertotree.py
@@ -0,0 +1,64 @@
+import sys
+import os
+import json
+import re
+
+import html5lib
+import support
+import test_parser
+import test_tokenizer
+
+p = html5lib.HTMLParser()
+
+unnamespaceExpected = re.compile(r"^(\|\s*)<html ([^>]+)>", re.M).sub
+
+def main(out_path):
+    if not os.path.exists(out_path):
+        sys.stderr.write("Path %s does not exist"%out_path)
+        sys.exit(1)
+
+    for filename in support.html5lib_test_files('tokenizer', '*.test'):
+        run_file(filename, out_path)
+
+def run_file(filename, out_path):
+    try:
+        tests_data = json.load(file(filename))
+    except ValueError:
+        sys.stderr.write("Failed to load %s\n"%filename)
+        return
+    name = os.path.splitext(os.path.split(filename)[1])[0]
+    output_file = open(os.path.join(out_path, "tokenizer_%s.dat"%name), "w")
+
+    if 'tests' in tests_data:
+        for test_data in tests_data['tests']:
+            if 'initialStates' not in test_data:
+                test_data["initialStates"] = ["Data state"]
+                
+            for initial_state in test_data["initialStates"]:
+                if initial_state != "Data state":
+                    #don't support this yet
+                    continue
+                test = make_test(test_data)
+                output_file.write(test)
+
+    output_file.close()
+
+def make_test(test_data):
+    if 'doubleEscaped' in test_data:
+        test_data = test_tokenizer.unescape_test(test_data)
+
+    rv = []
+    rv.append("#data")
+    rv.append(test_data["input"].encode("utf8"))
+    rv.append("#errors")
+    tree = p.parse(test_data["input"])
+    output = p.tree.testSerializer(tree)
+    output  = "\n".join(("| "+ line[3:]) if line.startswith("|  ") else line
+                        for line in output.split("\n"))
+    output = unnamespaceExpected(r"\1<\2>", output)
+    rv.append(output.encode("utf8"))
+    rv.append("")
+    return "\n".join(rv)
+
+if __name__ == "__main__":
+    main(sys.argv[1])
diff --git a/html5lib/tokenizer.py b/html5lib/tokenizer.py
new file mode 100644
index 00000000..7e9eca88
--- /dev/null
+++ b/html5lib/tokenizer.py
@@ -0,0 +1,1744 @@
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
+try:
+    from collections import deque
+except ImportError:
+    from utils import deque
+    
+from constants import spaceCharacters
+from constants import entitiesWindows1252, entities
+from constants import asciiLowercase, asciiLetters, asciiUpper2Lower
+from constants import digits, hexDigits, EOF
+from constants import tokenTypes, tagTokenTypes
+from constants import replacementCharacters
+
+from inputstream import HTMLInputStream
+
+# Group entities by their first character, for faster lookups
+entitiesByFirstChar = {}
+for e in entities:
+    entitiesByFirstChar.setdefault(e[0], []).append(e)
+
+class HTMLTokenizer(object):
+    """ This class takes care of tokenizing HTML.
+
+    * self.currentToken
+      Holds the token that is currently being processed.
+
+    * self.state
+      Holds a reference to the method to be invoked... XXX
+
+    * self.stream
+      Points to HTMLInputStream object.
+    """
+
+    def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
+                 lowercaseElementName=True, lowercaseAttrName=True, parser=None):
+
+        self.stream = HTMLInputStream(stream, encoding, parseMeta, useChardet)
+        self.parser = parser
+
+        #Perform case conversions?
+        self.lowercaseElementName = lowercaseElementName
+        self.lowercaseAttrName = lowercaseAttrName
+        
+        # Setup the initial tokenizer state
+        self.escapeFlag = False
+        self.lastFourChars = []
+        self.state = self.dataState
+        self.escape = False
+
+        # The current token being created
+        self.currentToken = None
+        super(HTMLTokenizer, self).__init__()
+
+    def __iter__(self):
+        """ This is where the magic happens.
+
+        We do our usually processing through the states and when we have a token
+        to return we yield the token which pauses processing until the next token
+        is requested.
+        """
+        self.tokenQueue = deque([])
+        # Start processing. When EOF is reached self.state will return False
+        # instead of True and the loop will terminate.
+        while self.state():
+            while self.stream.errors:
+                yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)}
+            while self.tokenQueue:
+                yield self.tokenQueue.popleft()
+
+    def consumeNumberEntity(self, isHex):
+        """This function returns either U+FFFD or the character based on the
+        decimal or hexadecimal representation. It also discards ";" if present.
+        If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
+        """
+
+        allowed = digits
+        radix = 10
+        if isHex:
+            allowed = hexDigits
+            radix = 16
+
+        charStack = []
+
+        # Consume all the characters that are in range while making sure we
+        # don't hit an EOF.
+        c = self.stream.char()
+        while c in allowed and c is not EOF:
+            charStack.append(c)
+            c = self.stream.char()
+
+        # Convert the set of characters consumed to an int.
+        charAsInt = int("".join(charStack), radix)
+
+        # Certain characters get replaced with others
+        if charAsInt in replacementCharacters:
+            char = replacementCharacters[charAsInt]
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "illegal-codepoint-for-numeric-entity",
+              "datavars": {"charAsInt": charAsInt}})
+        elif ((0xD800 <= charAsInt <= 0xDFFF) or 
+              (charAsInt > 0x10FFFF)):
+            char = u"\uFFFD"
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "illegal-codepoint-for-numeric-entity",
+              "datavars": {"charAsInt": charAsInt}})
+        else:
+            #Should speed up this check somehow (e.g. move the set to a constant)
+            if ((0x0001 <= charAsInt <= 0x0008) or 
+                (0x000E <= charAsInt <= 0x001F) or 
+                (0x007F  <= charAsInt <= 0x009F) or
+                (0xFDD0  <= charAsInt <= 0xFDEF) or 
+                charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, 
+                                        0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
+                                        0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 
+                                        0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE,
+                                        0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE,
+                                        0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 
+                                        0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, 
+                                        0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 
+                                        0xFFFFF, 0x10FFFE, 0x10FFFF])):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                        "data":
+                                            "illegal-codepoint-for-numeric-entity",
+                                        "datavars": {"charAsInt": charAsInt}})
+            try:
+                # Try/except needed as UCS-2 Python builds' unichar only works
+                # within the BMP.
+                char = unichr(charAsInt)
+            except ValueError:
+                char = eval("u'\\U%08x'" % charAsInt)
+
+        # Discard the ; if present. Otherwise, put it back on the queue and
+        # invoke parseError on parser.
+        if c != u";":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "numeric-entity-without-semicolon"})
+            self.stream.unget(c)
+
+        return char
+
+    def consumeEntity(self, allowedChar=None, fromAttribute=False):
+        # Initialise to the default output for when no entity is matched
+        output = u"&"
+
+        charStack = [self.stream.char()]
+        if (charStack[0] in spaceCharacters or charStack[0] in (EOF, u"<", u"&") 
+            or (allowedChar is not None and allowedChar == charStack[0])):
+            self.stream.unget(charStack[0])
+
+        elif charStack[0] == u"#":
+            # Read the next character to see if it's hex or decimal
+            hex = False
+            charStack.append(self.stream.char())
+            if charStack[-1] in (u"x", u"X"):
+                hex = True
+                charStack.append(self.stream.char())
+
+            # charStack[-1] should be the first digit
+            if (hex and charStack[-1] in hexDigits) \
+             or (not hex and charStack[-1] in digits):
+                # At least one digit found, so consume the whole number
+                self.stream.unget(charStack[-1])
+                output = self.consumeNumberEntity(hex)
+            else:
+                # No digits found
+                self.tokenQueue.append({"type": tokenTypes["ParseError"],
+                    "data": "expected-numeric-entity"})
+                self.stream.unget(charStack.pop())
+                output = u"&" + u"".join(charStack)
+
+        else:
+            # At this point in the process might have named entity. Entities
+            # are stored in the global variable "entities".
+            #
+            # Consume characters and compare to these to a substring of the
+            # entity names in the list until the substring no longer matches.
+            filteredEntityList = entitiesByFirstChar.get(charStack[0], [])
+
+            def entitiesStartingWith(name):
+                return [e for e in filteredEntityList if e.startswith(name)]
+
+            while (charStack[-1] is not EOF and
+                   entitiesStartingWith("".join(charStack))):
+                charStack.append(self.stream.char())
+
+            # At this point we have a string that starts with some characters
+            # that may match an entity
+            entityName = None
+
+            # Try to find the longest entity the string will match to take care
+            # of &noti for instance.
+            for entityLength in xrange(len(charStack)-1, 1, -1):
+                possibleEntityName = "".join(charStack[:entityLength])
+                if possibleEntityName in entities:
+                    entityName = possibleEntityName
+                    break
+
+            if entityName is not None:
+                if entityName[-1] != ";":
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                      "named-entity-without-semicolon"})
+                if (entityName[-1] != ";" and fromAttribute and
+                    (charStack[entityLength] in asciiLetters or
+                     charStack[entityLength] in digits or
+                    charStack[entityLength] == "=")):
+                    self.stream.unget(charStack.pop())
+                    output = u"&" + u"".join(charStack)
+                else:
+                    output = entities[entityName]
+                    self.stream.unget(charStack.pop())
+                    output += u"".join(charStack[entityLength:])
+            else:
+                self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                  "expected-named-entity"})
+                self.stream.unget(charStack.pop())
+                output = u"&" + u"".join(charStack)
+
+        if fromAttribute:
+            self.currentToken["data"][-1][1] += output
+        else:
+            if output in spaceCharacters:
+                tokenType = "SpaceCharacters"
+            else:
+                tokenType = "Characters"
+            self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
+
+    def processEntityInAttribute(self, allowedChar):
+        """This method replaces the need for "entityInAttributeValueState".
+        """
+        self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
+
+    def emitCurrentToken(self):
+        """This method is a generic handler for emitting the tags. It also sets
+        the state to "data" because that's what's needed after a token has been
+        emitted.
+        """
+        token = self.currentToken
+        # Add token to the queue to be yielded
+        if (token["type"] in tagTokenTypes):
+            if self.lowercaseElementName:
+                token["name"] = token["name"].translate(asciiUpper2Lower)
+            if token["type"] == tokenTypes["EndTag"]:
+                if token["data"]:
+                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
+                                            "data":"attributes-in-end-tag"})
+                if token["selfClosing"]:
+                    self.tokenQueue.append({"type":tokenTypes["ParseError"],
+                                            "data":"self-closing-flag-on-end-tag"})
+        self.tokenQueue.append(token)
+        self.state = self.dataState
+
+
+    # Below are the various tokenizer states worked out.
+
+    def dataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.entityDataState
+        elif data == "<":
+            self.state = self.tagOpenState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data":"invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\u0000"})
+        elif data is EOF:
+            # Tokenization ends.
+            return False
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+              data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil((u"&", u"<", u"\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+              data + chars})
+        return True
+
+    def entityDataState(self):
+        self.consumeEntity()
+        self.state = self.dataState
+        return True
+    
+    def rcdataState(self):
+        data = self.stream.char()
+        if data == "&":
+            self.state = self.characterReferenceInRcdata
+        elif data == "<":
+            self.state = self.rcdataLessThanSignState
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        elif data in spaceCharacters:
+            # Directly after emitting a token you switch back to the "data
+            # state". At that point spaceCharacters are important so they are
+            # emitted separately.
+            self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data":
+              data + self.stream.charsUntil(spaceCharacters, True)})
+            # No need to update lastFourChars here, since the first space will
+            # have already been appended to lastFourChars and will have broken
+            # any <!-- or --> sequences
+        else:
+            chars = self.stream.charsUntil((u"&", u"<"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+              data + chars})
+        return True
+
+    def characterReferenceInRcdata(self):
+        self.consumeEntity()
+        self.state = self.rcdataState
+        return True
+    
+    def rawtextState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.rawtextLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil((u"<", u"\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+              data + chars})
+        return True
+    
+    def scriptDataState(self):
+        data = self.stream.char()
+        if data == "<":
+            self.state = self.scriptDataLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        elif data == EOF:
+            # Tokenization ends.
+            return False
+        else:
+            chars = self.stream.charsUntil((u"<", u"\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+              data + chars})
+        return True
+    
+    def plaintextState(self):
+        data = self.stream.char()
+        if data == EOF:
+            # Tokenization ends.
+            return False
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+                                    data + self.stream.charsUntil(u"\u0000")})
+        return True
+
+    def tagOpenState(self):
+        data = self.stream.char()
+        if data == u"!":
+            self.state = self.markupDeclarationOpenState
+        elif data == u"/":
+            self.state = self.closeTagOpenState
+        elif data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["StartTag"], 
+                                 "name": data, "data": [],
+                                 "selfClosing": False,
+                                 "selfClosingAcknowledged": False}
+            self.state = self.tagNameState
+        elif data == u">":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-tag-name-but-got-right-bracket"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<>"})
+            self.state = self.dataState
+        elif data == u"?":
+            # XXX In theory it could be something besides a tag name. But
+            # do we really care?
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-tag-name-but-got-question-mark"})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-tag-name"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        return True
+
+    def closeTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.currentToken = {"type": tokenTypes["EndTag"], "name": data,
+                                 "data": [], "selfClosing":False}
+            self.state = self.tagNameState
+        elif data == u">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-closing-tag-but-got-right-bracket"})
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-closing-tag-but-got-eof"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"})
+            self.state = self.dataState
+        else:
+            # XXX data can be _'_...
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-closing-tag-but-got-char",
+              "datavars": {"data": data}})
+            self.stream.unget(data)
+            self.state = self.bogusCommentState
+        return True
+
+    def tagNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == u">":
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-tag-name"})
+            self.state = self.dataState
+        elif data == u"/":
+            self.state = self.selfClosingStartTagState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += u"\uFFFD"
+        else:
+            self.currentToken["name"] += data
+            # (Don't use charsUntil here, because tag names are
+            # very short and it's faster to not do anything fancy)
+        return True
+    
+    def rcdataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rcdataEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    
+    def rcdataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rcdataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    
+    def rcdataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": u"</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rcdataState
+        return True
+    
+    def rawtextLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.rawtextEndTagOpenState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    
+    def rawtextEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.rawtextEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    
+    def rawtextEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": u"</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.rawtextState
+        return True
+    
+    def scriptDataLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEndTagOpenState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<!"})
+            self.state = self.scriptDataEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    
+    def scriptDataEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer += data
+            self.state = self.scriptDataEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    
+    def scriptDataEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": u"</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    
+    def scriptDataEscapeStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataEscapeStartDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    
+    def scriptDataEscapeStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataEscapedDashDashState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataState
+        return True
+    
+    def scriptDataEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataEscapedDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            chars = self.stream.charsUntil((u"<", u"-", u"\u0000"))
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": 
+              data + chars})
+        return True
+    
+    def scriptDataEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataEscapedDashDashState
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataEscapedDashDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+        elif data == "<":
+            self.state = self.scriptDataEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u">"})
+            self.state = self.scriptDataState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+            self.state = self.scriptDataEscapedState
+        elif data == EOF:
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataEscapedEndTagOpenState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<" + data})
+            self.temporaryBuffer = data
+            self.state = self.scriptDataDoubleEscapeStartState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataEscapedEndTagOpenState(self):
+        data = self.stream.char()
+        if data in asciiLetters:
+            self.temporaryBuffer = data
+            self.state = self.scriptDataEscapedEndTagNameState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"</"})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataEscapedEndTagNameState(self):
+        appropriate = self.currentToken and self.currentToken["name"].lower() == self.temporaryBuffer.lower()
+        data = self.stream.char()
+        if data in spaceCharacters and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.beforeAttributeNameState
+        elif data == "/" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.state = self.selfClosingStartTagState
+        elif data == ">" and appropriate:
+            self.currentToken = {"type": tokenTypes["EndTag"],
+                                 "name": self.temporaryBuffer,
+                                 "data": [], "selfClosing":False}
+            self.emitCurrentToken()
+            self.state = self.dataState
+        elif data in asciiLetters:
+            self.temporaryBuffer += data
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"],
+                                    "data": u"</" + self.temporaryBuffer})
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataDoubleEscapeStartState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataDoubleEscapedState
+            else:
+                self.state = self.scriptDataEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataEscapedState
+        return True
+    
+    def scriptDataDoubleEscapedState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataDoubleEscapedDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+        return True
+    
+    def scriptDataDoubleEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+            self.state = self.scriptDataDoubleEscapedDashDashState
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    
+    def scriptDataDoubleEscapedDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"-"})
+        elif data == "<":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"<"})
+            self.state = self.scriptDataDoubleEscapedLessThanSignState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u">"})
+            self.state = self.scriptDataState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": u"\uFFFD"})
+            self.state = self.scriptDataDoubleEscapedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-script-in-script"})
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    
+    def scriptDataDoubleEscapedLessThanSignState(self):
+        data = self.stream.char()
+        if data == "/":
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": u"/"})
+            self.temporaryBuffer = ""
+            self.state = self.scriptDataDoubleEscapeEndState
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+    
+    def scriptDataDoubleEscapeEndState(self):
+        data = self.stream.char()
+        if data in (spaceCharacters | frozenset(("/", ">"))):
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            if self.temporaryBuffer.lower() == "script":
+                self.state = self.scriptDataEscapedState
+            else:
+                self.state = self.scriptDataDoubleEscapedState
+        elif data in asciiLetters:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data})
+            self.temporaryBuffer += data
+        else:
+            self.stream.unget(data)
+            self.state = self.scriptDataDoubleEscapedState
+        return True
+
+    def beforeAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == u">":
+            self.emitCurrentToken()
+        elif data == u"/":
+            self.state = self.selfClosingStartTagState
+        elif data in (u"'", u'"', u"=", u"<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "invalid-character-in-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append([u"\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-attribute-name-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+
+    def attributeNameState(self):
+        data = self.stream.char()
+        leavingThisState = True
+        emitToken = False
+        if data == u"=":
+            self.state = self.beforeAttributeValueState
+        elif data in asciiLetters:
+            self.currentToken["data"][-1][0] += data +\
+              self.stream.charsUntil(asciiLetters, True)
+            leavingThisState = False
+        elif data == u">":
+            # XXX If we emit here the attributes are converted to a dict
+            # without being checked and when the code below runs we error
+            # because data is a dict not a list
+            emitToken = True
+        elif data in spaceCharacters:
+            self.state = self.afterAttributeNameState
+        elif data == u"/":
+            self.state = self.selfClosingStartTagState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][0] += u"\uFFFD"
+            leavingThisState = False
+        elif data in (u"'", u'"', u"<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data":
+                                        "invalid-character-in-attribute-name"})
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "eof-in-attribute-name"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][0] += data
+            leavingThisState = False
+
+        if leavingThisState:
+            # Attributes are not dropped at this stage. That happens when the
+            # start tag token is emitted so values can still be safely appended
+            # to attributes, but we do want to report the parse error in time.
+            if self.lowercaseAttrName:
+                self.currentToken["data"][-1][0] = (
+                    self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+            for name, value in self.currentToken["data"][:-1]:
+                if self.currentToken["data"][-1][0] == name:
+                    self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                      "duplicate-attribute"})
+                    break
+            # XXX Fix for above XXX
+            if emitToken:
+                self.emitCurrentToken()
+        return True
+
+    def afterAttributeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == u"=":
+            self.state = self.beforeAttributeValueState
+        elif data == u">":
+            self.emitCurrentToken()
+        elif data in asciiLetters:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data == u"/":
+            self.state = self.selfClosingStartTagState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"].append([u"\uFFFD", ""])
+            self.state = self.attributeNameState
+        elif data in (u"'", u'"', u"<"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "invalid-character-after-attribute-name"})
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-end-of-tag-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"].append([data, ""])
+            self.state = self.attributeNameState
+        return True
+
+    def beforeAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.stream.charsUntil(spaceCharacters, True)
+        elif data == u"\"":
+            self.state = self.attributeValueDoubleQuotedState
+        elif data == u"&":
+            self.state = self.attributeValueUnQuotedState
+            self.stream.unget(data);
+        elif data == u"'":
+            self.state = self.attributeValueSingleQuotedState
+        elif data == u">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-attribute-value-but-got-right-bracket"})
+            self.emitCurrentToken()
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += u"\uFFFD"
+            self.state = self.attributeValueUnQuotedState
+        elif data in (u"=", u"<", u"`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "equals-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-attribute-value-but-got-eof"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data
+            self.state = self.attributeValueUnQuotedState
+        return True
+
+    def attributeValueDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterAttributeValueState
+        elif data == u"&":
+            self.processEntityInAttribute(u'"')
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += u"\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-attribute-value-double-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+              self.stream.charsUntil(("\"", u"&"))
+        return True
+
+    def attributeValueSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterAttributeValueState
+        elif data == u"&":
+            self.processEntityInAttribute(u"'")
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += u"\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-attribute-value-single-quote"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data +\
+              self.stream.charsUntil(("'", u"&"))
+        return True
+
+    def attributeValueUnQuotedState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == u"&":
+            self.processEntityInAttribute(">")
+        elif data == u">":
+            self.emitCurrentToken()
+        elif data in (u'"', u"'", u"=", u"<", u"`"):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-character-in-unquoted-attribute-value"})
+            self.currentToken["data"][-1][1] += data
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"][-1][1] += u"\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-attribute-value-no-quotes"})
+            self.state = self.dataState
+        else:
+            self.currentToken["data"][-1][1] += data + self.stream.charsUntil(
+              frozenset((u"&", u">", u'"', u"'", u"=", u"<", u"`")) | spaceCharacters)
+        return True
+
+    def afterAttributeValueState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeAttributeNameState
+        elif data == u">":
+            self.emitCurrentToken()
+        elif data == u"/":
+            self.state = self.selfClosingStartTagState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-EOF-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-character-after-attribute-value"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+
+    def selfClosingStartTagState(self):
+        data = self.stream.char()
+        if data == ">":
+            self.currentToken["selfClosing"] = True
+            self.emitCurrentToken()
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data":
+                                        "unexpected-EOF-after-solidus-in-tag"})
+            self.stream.unget(data)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-character-after-soldius-in-tag"})
+            self.stream.unget(data)
+            self.state = self.beforeAttributeNameState
+        return True
+
+    def bogusCommentState(self):
+        # Make a new comment token and give it as value all the characters
+        # until the first > or EOF (charsUntil checks for EOF automatically)
+        # and emit it.
+        data = self.stream.charsUntil(u">")
+        data = data.replace(u"\u0000", u"\uFFFD")
+        self.tokenQueue.append(
+          {"type": tokenTypes["Comment"], "data": data})
+
+        # Eat the character directly after the bogus comment which is either a
+        # ">" or an EOF.
+        self.stream.char()
+        self.state = self.dataState
+        return True
+
+    def markupDeclarationOpenState(self):
+        charStack = [self.stream.char()]
+        if charStack[-1] == u"-":
+            charStack.append(self.stream.char())
+            if charStack[-1] == u"-":
+                self.currentToken = {"type": tokenTypes["Comment"], "data": u""}
+                self.state = self.commentStartState
+                return True
+        elif charStack[-1] in (u'd', u'D'):
+            matched = True
+            for expected in ((u'o', u'O'), (u'c', u'C'), (u't', u'T'),
+                             (u'y', u'Y'), (u'p', u'P'), (u'e', u'E')):
+                charStack.append(self.stream.char())
+                if charStack[-1] not in expected:
+                    matched = False
+                    break
+            if matched:
+                self.currentToken = {"type": tokenTypes["Doctype"],
+                                     "name": u"",
+                                     "publicId": None, "systemId": None, 
+                                     "correct": True}
+                self.state = self.doctypeState
+                return True
+        elif (charStack[-1] == "[" and 
+              self.parser is not None and
+              self.parser.tree.openElements and
+              self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace):
+            matched = True
+            for expected in ["C", "D", "A", "T", "A", "["]:
+                charStack.append(self.stream.char())
+                if charStack[-1] != expected:
+                    matched = False
+                    break
+            if matched:
+                self.state = self.cdataSectionState
+                return True
+
+        self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+          "expected-dashes-or-doctype"})
+
+        while charStack:
+            self.stream.unget(charStack.pop())
+        self.state = self.bogusCommentState
+        return True
+
+    def commentStartState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentStartDashState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data
+            self.state = self.commentState
+        return True
+    
+    def commentStartDashState(self):
+        data = self.stream.char()
+        if data == "-":
+            self.state = self.commentEndState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"-\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "incorrect-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += "-" + data
+            self.state = self.commentState
+        return True
+
+    
+    def commentState(self):
+        data = self.stream.char()
+        if data == u"-":
+            self.state = self.commentEndDashState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"\uFFFD"
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "eof-in-comment"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += data + \
+                self.stream.charsUntil((u"-", u"\u0000"))
+        return True
+
+    def commentEndDashState(self):
+        data = self.stream.char()
+        if data == u"-":
+            self.state = self.commentEndState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"-\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-comment-end-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += u"-" + data
+            self.state = self.commentState
+        return True
+
+    def commentEndState(self):
+        data = self.stream.char()
+        if data == u">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"--\uFFFD"
+            self.state = self.commentState
+        elif data == "!":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-bang-after-double-dash-in-comment"})
+            self.state = self.commentEndBangState
+        elif data == u"-":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+             "unexpected-dash-after-double-dash-in-comment"})
+            self.currentToken["data"] += data
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-comment-double-dash"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            # XXX
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-comment"})
+            self.currentToken["data"] += u"--" + data
+            self.state = self.commentState
+        return True
+
+    def commentEndBangState(self):
+        data = self.stream.char()
+        if data == u">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == u"-":
+            self.currentToken["data"] += "--!"
+            self.state = self.commentEndDashState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["data"] += u"--!\uFFFD"
+            self.state = self.commentState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-comment-end-bang-state"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["data"] += u"--!" + data
+            self.state = self.commentState
+        return True
+
+    def doctypeState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "need-space-after-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeNameState
+        return True
+
+    def beforeDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == u">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-doctype-name-but-got-right-bracket"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] = u"\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "expected-doctype-name-but-got-eof"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] = data
+            self.state = self.doctypeNameState
+        return True
+
+    def doctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.state = self.afterDoctypeNameState
+        elif data == u">":
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["name"] += u"\uFFFD"
+            self.state = self.doctypeNameState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype-name"})
+            self.currentToken["correct"] = False
+            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["name"] += data
+        return True
+
+    def afterDoctypeNameState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == u">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.currentToken["correct"] = False
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            if data in (u"p", u"P"):
+                matched = True
+                for expected in ((u"u", u"U"), (u"b", u"B"), (u"l", u"L"),
+                                 (u"i", u"I"), (u"c", u"C")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypePublicKeywordState
+                    return True
+            elif data in (u"s", u"S"):
+                matched = True
+                for expected in ((u"y", u"Y"), (u"s", u"S"), (u"t", u"T"),
+                                 (u"e", u"E"), (u"m", u"M")):
+                    data = self.stream.char()
+                    if data not in expected:
+                        matched = False
+                        break
+                if matched:
+                    self.state = self.afterDoctypeSystemKeywordState
+                    return True
+
+            # All the characters read before the current 'data' will be
+            # [a-zA-Z], so they're garbage in the bogus doctype and can be
+            # discarded; only the latest character might be '>' or EOF
+            # and needs to be ungetted
+            self.stream.unget(data)
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+                "expected-space-or-right-bracket-in-doctype", "datavars":
+                {"data": data}})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+
+        return True
+    
+    def afterDoctypePublicKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypePublicIdentifierState
+        return True
+
+    def beforeDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["publicId"] = u""
+            self.state = self.doctypePublicIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["publicId"] = u""
+            self.state = self.doctypePublicIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def doctypePublicIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += u"\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+
+    def doctypePublicIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypePublicIdentifierState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["publicId"] += u"\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["publicId"] += data
+        return True
+
+    def afterDoctypePublicIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.betweenDoctypePublicAndSystemIdentifiersState
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    
+    def betweenDoctypePublicAndSystemIdentifiersState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data == '"':
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+    
+    def afterDoctypeSystemKeywordState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data in ("'", '"'):
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.stream.unget(data)
+            self.state = self.beforeDoctypeSystemIdentifierState
+        return True
+    
+    def beforeDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == "\"":
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierDoubleQuotedState
+        elif data == "'":
+            self.currentToken["systemId"] = u""
+            self.state = self.doctypeSystemIdentifierSingleQuotedState
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.currentToken["correct"] = False
+            self.state = self.bogusDoctypeState
+        return True
+
+    def doctypeSystemIdentifierDoubleQuotedState(self):
+        data = self.stream.char()
+        if data == "\"":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += u"\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+
+    def doctypeSystemIdentifierSingleQuotedState(self):
+        data = self.stream.char()
+        if data == "'":
+            self.state = self.afterDoctypeSystemIdentifierState
+        elif data == u"\u0000":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                    "data": "invalid-codepoint"})
+            self.currentToken["systemId"] += u"\uFFFD"
+        elif data == ">":
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-end-of-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.currentToken["systemId"] += data
+        return True
+
+    def afterDoctypeSystemIdentifierState(self):
+        data = self.stream.char()
+        if data in spaceCharacters:
+            pass
+        elif data == ">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "eof-in-doctype"})
+            self.currentToken["correct"] = False
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
+              "unexpected-char-in-doctype"})
+            self.state = self.bogusDoctypeState
+        return True
+
+    def bogusDoctypeState(self):
+        data = self.stream.char()
+        if data == u">":
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        elif data is EOF:
+            # XXX EMIT
+            self.stream.unget(data)
+            self.tokenQueue.append(self.currentToken)
+            self.state = self.dataState
+        else:
+            pass
+        return True
+
+    def cdataSectionState(self):
+        data = []
+        while True:
+            data.append(self.stream.charsUntil(u"]"))
+            charStack = []
+
+            for expected in ["]", "]", ">"]:
+                charStack.append(self.stream.char())
+                matched = True
+                if charStack[-1] == EOF:
+                    data.extend(charStack[:-1])
+                    break
+                elif charStack[-1] != expected:
+                    matched = False
+                    data.extend(charStack)
+                    break
+
+            if matched:
+                break
+        data = "".join(data)
+        #Deal with null here rather than in the parser
+        nullCount = data.count(u"\u0000")
+        if nullCount > 0:
+            for i in xrange(nullCount):
+                self.tokenQueue.append({"type": tokenTypes["ParseError"], 
+                                        "data": "invalid-codepoint"})
+            data = data.replace(u"\u0000", u"\uFFFD")
+        if data:
+            self.tokenQueue.append({"type": tokenTypes["Characters"], 
+                                    "data": data})
+        self.state = self.dataState
+        return True
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
new file mode 100755
index 00000000..14f66d40
--- /dev/null
+++ b/html5lib/treebuilders/__init__.py
@@ -0,0 +1,96 @@
+"""A collection of modules for building different kinds of tree from
+HTML documents.
+
+To create a treebuilder for a new type of tree, you need to do
+implement several things:
+
+1) A set of classes for various types of elements: Document, Doctype,
+Comment, Element. These must implement the interface of
+_base.treebuilders.Node (although comment nodes have a different
+signature for their constructor, see treebuilders.simpletree.Comment)
+Textual content may also be implemented as another node type, or not, as
+your tree implementation requires.
+
+2) A treebuilder object (called TreeBuilder by convention) that
+inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
+documentClass - the class to use for the bottommost node of a document
+elementClass - the class to use for HTML Elements
+commentClass - the class to use for comments
+doctypeClass - the class to use for doctypes
+It also has one required method:
+getDocument - Returns the root node of the complete document tree
+
+3) If you wish to run the unit tests, you must also create a
+testSerializer method on your treebuilder which accepts a node and
+returns a string containing Node and its children serialized according
+to the format used in the unittests
+
+The supplied simpletree module provides a python-only implementation
+of a full treebuilder and is a useful reference for the semantics of
+the various methods.
+"""
+
+treeBuilderCache = {}
+
+import sys
+
+def getTreeBuilder(treeType, implementation=None, **kwargs):
+    """Get a TreeBuilder class for various types of tree with built-in support
+    
+    treeType - the name of the tree type required (case-insensitive). Supported
+               values are "simpletree", "dom", "etree" and "beautifulsoup"
+               
+               "simpletree" - a built-in DOM-ish tree type with support for some
+                              more pythonic idioms.
+                "dom" - A generic builder for DOM implementations, defaulting to
+                        a xml.dom.minidom based implementation for the sake of
+                        backwards compatibility (as releases up until 0.10 had a
+                        builder called "dom" that was a minidom implemenation).
+                "etree" - A generic builder for tree implementations exposing an
+                          elementtree-like interface (known to work with
+                          ElementTree, cElementTree and lxml.etree).
+                "beautifulsoup" - Beautiful soup (if installed)
+               
+    implementation - (Currently applies to the "etree" and "dom" tree types). A
+                      module implementing the tree type e.g.
+                      xml.etree.ElementTree or lxml.etree."""
+    
+    treeType = treeType.lower()
+    if treeType not in treeBuilderCache:
+        if treeType == "dom":
+            import dom
+            # XXX: Keep backwards compatibility by using minidom if no implementation is given
+            if implementation == None:
+                from xml.dom import minidom
+                implementation = minidom
+            # XXX: NEVER cache here, caching is done in the dom submodule
+            return dom.getDomModule(implementation, **kwargs).TreeBuilder
+        elif treeType == "simpletree":
+            import simpletree
+            treeBuilderCache[treeType] = simpletree.TreeBuilder
+        elif treeType == "beautifulsoup":
+            import soup
+            treeBuilderCache[treeType] = soup.TreeBuilder
+        elif treeType == "lxml":
+            import etree_lxml
+            treeBuilderCache[treeType] = etree_lxml.TreeBuilder
+        elif treeType == "etree":
+            # Come up with a sane default
+            if implementation == None:
+                try:
+                    import xml.etree.cElementTree as ET
+                except ImportError:
+                    try:
+                        import xml.etree.ElementTree as ET
+                    except ImportError:
+                        try:
+                            import cElementTree as ET
+                        except ImportError:
+                            import elementtree.ElementTree as ET
+                implementation = ET
+            import etree
+            # NEVER cache here, caching is done in the etree submodule
+            return etree.getETreeModule(implementation, **kwargs).TreeBuilder
+        else:
+            raise ValueError("""Unrecognised treebuilder "%s" """%treeType)
+    return treeBuilderCache.get(treeType)
diff --git a/html5lib/treebuilders/_base.py b/html5lib/treebuilders/_base.py
new file mode 100755
index 00000000..f3782d28
--- /dev/null
+++ b/html5lib/treebuilders/_base.py
@@ -0,0 +1,377 @@
+from html5lib.constants import scopingElements, tableInsertModeElements, namespaces
+try:
+    frozenset
+except NameError:
+    # Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
+
+# The scope markers are inserted when entering object elements,
+# marquees, table cells, and table captions, and are used to prevent formatting
+# from "leaking" into tables, object elements, and marquees.
+Marker = None
+
+class Node(object):
+    def __init__(self, name):
+        """Node representing an item in the tree.
+        name - The tag name associated with the node
+        parent - The parent of the current node (or None for the document node)
+        value - The value of the current node (applies to text nodes and 
+        comments
+        attributes - a dict holding name, value pairs for attributes of the node
+        childNodes - a list of child nodes of the current node. This must 
+        include all elements but not necessarily other node types
+        _flags - A list of miscellaneous flags that can be set on the node
+        """
+        self.name = name
+        self.parent = None
+        self.value = None
+        self.attributes = {}
+        self.childNodes = []
+        self._flags = []
+
+    def __unicode__(self):
+        attributesStr =  " ".join(["%s=\"%s\""%(name, value) 
+                                   for name, value in 
+                                   self.attributes.iteritems()])
+        if attributesStr:
+            return "<%s %s>"%(self.name,attributesStr)
+        else:
+            return "<%s>"%(self.name)
+
+    def __repr__(self):
+        return "<%s>" % (self.name)
+
+    def appendChild(self, node):
+        """Insert node as a child of the current node
+        """
+        raise NotImplementedError
+
+    def insertText(self, data, insertBefore=None):
+        """Insert data as text in the current node, positioned before the 
+        start of node insertBefore or to the end of the node's text.
+        """
+        raise NotImplementedError
+
+    def insertBefore(self, node, refNode):
+        """Insert node as a child of the current node, before refNode in the 
+        list of child nodes. Raises ValueError if refNode is not a child of 
+        the current node"""
+        raise NotImplementedError
+
+    def removeChild(self, node):
+        """Remove node from the children of the current node
+        """
+        raise NotImplementedError
+
+    def reparentChildren(self, newParent):
+        """Move all the children of the current node to newParent. 
+        This is needed so that trees that don't store text as nodes move the 
+        text in the correct way
+        """
+        #XXX - should this method be made more general?
+        for child in self.childNodes:
+            newParent.appendChild(child)
+        self.childNodes = []
+
+    def cloneNode(self):
+        """Return a shallow copy of the current node i.e. a node with the same
+        name and attributes but with no parent or child nodes
+        """
+        raise NotImplementedError
+
+
+    def hasContent(self):
+        """Return true if the node has children or text, false otherwise
+        """
+        raise NotImplementedError
+
+class ActiveFormattingElements(list):
+    def append(self, node):
+        equalCount = 0
+        if node != Marker:
+            for element in self[::-1]:
+                if element == Marker:
+                    break
+                if self.nodesEqual(element, node):
+                    equalCount += 1
+                if equalCount == 3:
+                    self.remove(element)
+                    break
+        list.append(self, node)
+
+    def nodesEqual(self, node1, node2):
+        if not node1.nameTuple == node2.nameTuple:
+            return False
+        
+        if not node1.attributes == node2.attributes:
+            return False
+        
+        return True
+
+class TreeBuilder(object):
+    """Base treebuilder implementation
+    documentClass - the class to use for the bottommost node of a document
+    elementClass - the class to use for HTML Elements
+    commentClass - the class to use for comments
+    doctypeClass - the class to use for doctypes
+    """
+
+    #Document class
+    documentClass = None
+
+    #The class to use for creating a node
+    elementClass = None
+
+    #The class to use for creating comments
+    commentClass = None
+
+    #The class to use for creating doctypes
+    doctypeClass = None
+    
+    #Fragment class
+    fragmentClass = None
+
+    def __init__(self, namespaceHTMLElements):
+        if namespaceHTMLElements:
+            self.defaultNamespace = "http://www.w3.org/1999/xhtml"
+        else:
+            self.defaultNamespace = None
+        self.reset()
+    
+    def reset(self):
+        self.openElements = []
+        self.activeFormattingElements = ActiveFormattingElements()
+
+        #XXX - rename these to headElement, formElement
+        self.headPointer = None
+        self.formPointer = None
+
+        self.insertFromTable = False
+
+        self.document = self.documentClass()
+
+    def elementInScope(self, target, variant=None):
+
+        #If we pass a node in we match that. if we pass a string
+        #match any node with that name
+        exactNode = hasattr(target, "nameTuple")
+
+        listElementsMap = {
+            None:(scopingElements, False),
+            "button":(scopingElements | set([(namespaces["html"], "button")]), False),
+            "list":(scopingElements | set([(namespaces["html"], "ol"),
+                                           (namespaces["html"], "ul")]), False),
+            "table":(set([(namespaces["html"], "html"),
+                          (namespaces["html"], "table")]), False),
+            "select":(set([(namespaces["html"], "optgroup"), 
+                           (namespaces["html"], "option")]), True)
+            }
+        listElements, invert = listElementsMap[variant]
+
+        for node in reversed(self.openElements):
+            if (node.name == target and not exactNode or
+                node == target and exactNode):
+                return True
+            elif (invert ^ (node.nameTuple in listElements)):                
+                return False
+
+        assert False # We should never reach this point
+
+    def reconstructActiveFormattingElements(self):
+        # Within this algorithm the order of steps described in the
+        # specification is not quite the same as the order of steps in the
+        # code. It should still do the same though.
+
+        # Step 1: stop the algorithm when there's nothing to do.
+        if not self.activeFormattingElements:
+            return
+
+        # Step 2 and step 3: we start with the last element. So i is -1.
+        i = len(self.activeFormattingElements) - 1
+        entry = self.activeFormattingElements[i]
+        if entry == Marker or entry in self.openElements:
+            return
+
+        # Step 6
+        while entry != Marker and entry not in self.openElements:
+            if i == 0:
+                #This will be reset to 0 below
+                i = -1
+                break
+            i -= 1
+            # Step 5: let entry be one earlier in the list.
+            entry = self.activeFormattingElements[i]
+
+        while True:
+            # Step 7
+            i += 1
+
+            # Step 8
+            entry = self.activeFormattingElements[i]
+            clone = entry.cloneNode() #Mainly to get a new copy of the attributes
+
+            # Step 9
+            element = self.insertElement({"type":"StartTag", 
+                                          "name":clone.name, 
+                                          "namespace":clone.namespace, 
+                                          "data":clone.attributes})
+
+            # Step 10
+            self.activeFormattingElements[i] = element
+
+            # Step 11
+            if element == self.activeFormattingElements[-1]:
+                break
+
+    def clearActiveFormattingElements(self):
+        entry = self.activeFormattingElements.pop()
+        while self.activeFormattingElements and entry != Marker:
+            entry = self.activeFormattingElements.pop()
+
+    def elementInActiveFormattingElements(self, name):
+        """Check if an element exists between the end of the active
+        formatting elements and the last marker. If it does, return it, else
+        return false"""
+
+        for item in self.activeFormattingElements[::-1]:
+            # Check for Marker first because if it's a Marker it doesn't have a
+            # name attribute.
+            if item == Marker:
+                break
+            elif item.name == name:
+                return item
+        return False
+
+    def insertRoot(self, token):
+        element = self.createElement(token)
+        self.openElements.append(element)
+        self.document.appendChild(element)
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        doctype = self.doctypeClass(name, publicId, systemId)
+        self.document.appendChild(doctype)
+
+    def insertComment(self, token, parent=None):
+        if parent is None:
+            parent = self.openElements[-1]
+        parent.appendChild(self.commentClass(token["data"]))
+                           
+    def createElement(self, token):
+        """Create an element but don't insert it anywhere"""
+        name = token["name"]
+        namespace = token.get("namespace", self.defaultNamespace)
+        element = self.elementClass(name, namespace)
+        element.attributes = token["data"]
+        return element
+
+    def _getInsertFromTable(self):
+        return self._insertFromTable
+
+    def _setInsertFromTable(self, value):
+        """Switch the function used to insert an element from the
+        normal one to the misnested table one and back again"""
+        self._insertFromTable = value
+        if value:
+            self.insertElement = self.insertElementTable
+        else:
+            self.insertElement = self.insertElementNormal
+
+    insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
+        
+    def insertElementNormal(self, token):
+        name = token["name"]
+        assert type(name) == unicode, "Element %s not unicode"%name
+        namespace = token.get("namespace", self.defaultNamespace)
+        element = self.elementClass(name, namespace)
+        element.attributes = token["data"]
+        self.openElements[-1].appendChild(element)
+        self.openElements.append(element)
+        return element
+
+    def insertElementTable(self, token):
+        """Create an element and insert it into the tree""" 
+        element = self.createElement(token)
+        if self.openElements[-1].name not in tableInsertModeElements:
+            return self.insertElementNormal(token)
+        else:
+            #We should be in the InTable mode. This means we want to do
+            #special magic element rearranging
+            parent, insertBefore = self.getTableMisnestedNodePosition()
+            if insertBefore is None:
+                parent.appendChild(element)
+            else:
+                parent.insertBefore(element, insertBefore)
+            self.openElements.append(element)
+        return element
+
+    def insertText(self, data, parent=None):
+        """Insert text data."""
+        if parent is None:
+            parent = self.openElements[-1]
+
+        if (not self.insertFromTable or (self.insertFromTable and
+                                         self.openElements[-1].name 
+                                         not in tableInsertModeElements)):
+            parent.insertText(data)
+        else:
+            # We should be in the InTable mode. This means we want to do
+            # special magic element rearranging
+            parent, insertBefore = self.getTableMisnestedNodePosition()
+            parent.insertText(data, insertBefore)
+            
+    def getTableMisnestedNodePosition(self):
+        """Get the foster parent element, and sibling to insert before
+        (or None) when inserting a misnested table node"""
+        # The foster parent element is the one which comes before the most
+        # recently opened table element
+        # XXX - this is really inelegant
+        lastTable=None
+        fosterParent = None
+        insertBefore = None
+        for elm in self.openElements[::-1]:
+            if elm.name == "table":
+                lastTable = elm
+                break
+        if lastTable:
+            # XXX - we should really check that this parent is actually a
+            # node here
+            if lastTable.parent:
+                fosterParent = lastTable.parent
+                insertBefore = lastTable
+            else:
+                fosterParent = self.openElements[
+                    self.openElements.index(lastTable) - 1]
+        else:
+            fosterParent = self.openElements[0]
+        return fosterParent, insertBefore
+
+    def generateImpliedEndTags(self, exclude=None):
+        name = self.openElements[-1].name
+        # XXX td, th and tr are not actually needed
+        if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
+            and name != exclude):
+            self.openElements.pop()
+            # XXX This is not entirely what the specification says. We should
+            # investigate it more closely.
+            self.generateImpliedEndTags(exclude)
+
+    def getDocument(self):
+        "Return the final tree"
+        return self.document
+    
+    def getFragment(self):
+        "Return the final fragment"
+        #assert self.innerHTML
+        fragment = self.fragmentClass()
+        self.openElements[0].reparentChildren(fragment)
+        return fragment
+
+    def testSerializer(self, node):
+        """Serialize the subtree of node in the format required by unit tests
+        node - the node from which to start serializing"""
+        raise NotImplementedError
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
new file mode 100644
index 00000000..9578da2b
--- /dev/null
+++ b/html5lib/treebuilders/dom.py
@@ -0,0 +1,291 @@
+
+from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
+try:
+    from types import ModuleType
+except:
+    from new import module as ModuleType
+import re
+import weakref
+
+import _base
+from html5lib import constants, ihatexml
+from html5lib.constants import namespaces
+
+moduleCache = {}
+
+def getDomModule(DomImplementation):
+    name = "_" + DomImplementation.__name__+"builder"
+    if name in moduleCache:
+        return moduleCache[name]
+    else:
+        mod = ModuleType(name)
+        objs = getDomBuilder(DomImplementation)
+        mod.__dict__.update(objs)
+        moduleCache[name] = mod    
+        return mod
+
+def getDomBuilder(DomImplementation):
+    Dom = DomImplementation
+    class AttrList(object):
+        def __init__(self, element):
+            self.element = element
+        def __iter__(self):
+            return self.element.attributes.items().__iter__()
+        def __setitem__(self, name, value):
+            self.element.setAttribute(name, value)
+        def __len__(self):
+            return len(self.element.attributes.items())
+        def items(self):
+            return [(item[0], item[1]) for item in
+                     self.element.attributes.items()]
+        def keys(self):
+            return self.element.attributes.keys()
+        def __getitem__(self, name):
+            return self.element.getAttribute(name)
+
+        def __contains__(self, name):
+            if isinstance(name, tuple):
+                raise NotImplementedError
+            else:
+                return self.element.hasAttribute(name)
+    
+    class NodeBuilder(_base.Node):
+        def __init__(self, element):
+            _base.Node.__init__(self, element.nodeName)
+            self.element = element
+
+        namespace = property(lambda self:hasattr(self.element, "namespaceURI")
+                             and self.element.namespaceURI or None)
+
+        def appendChild(self, node):
+            node.parent = self
+            self.element.appendChild(node.element)
+    
+        def insertText(self, data, insertBefore=None):
+            text = self.element.ownerDocument.createTextNode(data)
+            if insertBefore:
+                self.element.insertBefore(text, insertBefore.element)
+            else:
+                self.element.appendChild(text)
+    
+        def insertBefore(self, node, refNode):
+            self.element.insertBefore(node.element, refNode.element)
+            node.parent = self
+    
+        def removeChild(self, node):
+            if node.element.parentNode == self.element:
+                self.element.removeChild(node.element)
+            node.parent = None
+    
+        def reparentChildren(self, newParent):
+            while self.element.hasChildNodes():
+                child = self.element.firstChild
+                self.element.removeChild(child)
+                newParent.element.appendChild(child)
+            self.childNodes = []
+    
+        def getAttributes(self):
+            return AttrList(self.element)
+    
+        def setAttributes(self, attributes):
+            if attributes:
+                for name, value in attributes.items():
+                    if isinstance(name, tuple):
+                        if name[0] is not None:
+                            qualifiedName = (name[0] + ":" + name[1])
+                        else:
+                            qualifiedName = name[1]
+                        self.element.setAttributeNS(name[2], qualifiedName, 
+                                                    value)
+                    else:
+                        self.element.setAttribute(
+                            name, value)
+        attributes = property(getAttributes, setAttributes)
+    
+        def cloneNode(self):
+            return NodeBuilder(self.element.cloneNode(False))
+    
+        def hasContent(self):
+            return self.element.hasChildNodes()
+
+        def getNameTuple(self):
+            if self.namespace == None:
+                return namespaces["html"], self.name
+            else:
+                return self.namespace, self.name
+
+        nameTuple = property(getNameTuple)
+
+    class TreeBuilder(_base.TreeBuilder):
+        def documentClass(self):
+            self.dom = Dom.getDOMImplementation().createDocument(None,None,None)
+            return weakref.proxy(self)
+    
+        def insertDoctype(self, token):
+            name = token["name"]
+            publicId = token["publicId"]
+            systemId = token["systemId"]
+
+            domimpl = Dom.getDOMImplementation()
+            doctype = domimpl.createDocumentType(name, publicId, systemId)
+            self.document.appendChild(NodeBuilder(doctype))
+            if Dom == minidom:
+                doctype.ownerDocument = self.dom
+    
+        def elementClass(self, name, namespace=None):
+            if namespace is None and self.defaultNamespace is None:
+                node = self.dom.createElement(name)
+            else:
+                node = self.dom.createElementNS(namespace, name)
+
+            return NodeBuilder(node)
+            
+        def commentClass(self, data):
+            return NodeBuilder(self.dom.createComment(data))
+        
+        def fragmentClass(self):
+            return NodeBuilder(self.dom.createDocumentFragment())
+    
+        def appendChild(self, node):
+            self.dom.appendChild(node.element)
+    
+        def testSerializer(self, element):
+            return testSerializer(element)
+    
+        def getDocument(self):
+            return self.dom
+        
+        def getFragment(self):
+            return _base.TreeBuilder.getFragment(self).element
+    
+        def insertText(self, data, parent=None):
+            data=data
+            if parent <> self:
+                _base.TreeBuilder.insertText(self, data, parent)
+            else:
+                # HACK: allow text nodes as children of the document node
+                if hasattr(self.dom, '_child_node_types'):
+                    if not Node.TEXT_NODE in self.dom._child_node_types:
+                        self.dom._child_node_types=list(self.dom._child_node_types)
+                        self.dom._child_node_types.append(Node.TEXT_NODE)
+                self.dom.appendChild(self.dom.createTextNode(data))
+    
+        name = None
+    
+    def testSerializer(element):
+        element.normalize()
+        rv = []
+        def serializeElement(element, indent=0):
+            if element.nodeType == Node.DOCUMENT_TYPE_NODE:
+                if element.name:
+                    if element.publicId or element.systemId:
+                        publicId = element.publicId or ""
+                        systemId = element.systemId or ""
+                        rv.append( """|%s<!DOCTYPE %s "%s" "%s">"""%(
+                                ' '*indent, element.name, publicId, systemId))
+                    else:
+                        rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
+                else:
+                    rv.append("|%s<!DOCTYPE >"%(' '*indent,))
+            elif element.nodeType == Node.DOCUMENT_NODE:
+                rv.append("#document")
+            elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+                rv.append("#document-fragment")
+            elif element.nodeType == Node.COMMENT_NODE:
+                rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
+            elif element.nodeType == Node.TEXT_NODE:
+                rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
+            else:
+                if (hasattr(element, "namespaceURI") and
+                    element.namespaceURI != None):
+                    name = "%s %s"%(constants.prefixes[element.namespaceURI],
+                                    element.nodeName)
+                else:
+                    name = element.nodeName
+                rv.append("|%s<%s>"%(' '*indent, name))
+                if element.hasAttributes():
+                    attributes = []
+                    for i in range(len(element.attributes)):
+                        attr = element.attributes.item(i)
+                        name = attr.nodeName
+                        value = attr.value
+                        ns = attr.namespaceURI
+                        if ns:
+                            name = "%s %s"%(constants.prefixes[ns], attr.localName)
+                        else:
+                            name = attr.nodeName
+                        attributes.append((name, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+            indent += 2
+            for child in element.childNodes:
+                serializeElement(child, indent)
+        serializeElement(element, 0)
+    
+        return "\n".join(rv)
+    
+    def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
+      if node.nodeType == Node.ELEMENT_NODE:
+        if not nsmap:
+          handler.startElement(node.nodeName, node.attributes)
+          for child in node.childNodes: dom2sax(child, handler, nsmap)
+          handler.endElement(node.nodeName)
+        else:
+          attributes = dict(node.attributes.itemsNS()) 
+    
+          # gather namespace declarations
+          prefixes = []
+          for attrname in node.attributes.keys():
+            attr = node.getAttributeNode(attrname)
+            if (attr.namespaceURI == XMLNS_NAMESPACE or
+               (attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
+              prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
+              handler.startPrefixMapping(prefix, attr.nodeValue)
+              prefixes.append(prefix)
+              nsmap = nsmap.copy()
+              nsmap[prefix] = attr.nodeValue
+              del attributes[(attr.namespaceURI, attr.nodeName)]
+    
+          # apply namespace declarations
+          for attrname in node.attributes.keys():
+            attr = node.getAttributeNode(attrname)
+            if attr.namespaceURI == None and ':' in attr.nodeName:
+              prefix = attr.nodeName.split(':')[0]
+              if nsmap.has_key(prefix):
+                del attributes[(attr.namespaceURI, attr.nodeName)]
+                attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue
+    
+          # SAX events
+          ns = node.namespaceURI or nsmap.get(None,None)
+          handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
+          for child in node.childNodes: dom2sax(child, handler, nsmap)
+          handler.endElementNS((ns, node.nodeName), node.nodeName)
+          for prefix in prefixes: handler.endPrefixMapping(prefix)
+    
+      elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
+        handler.characters(node.nodeValue)
+    
+      elif node.nodeType == Node.DOCUMENT_NODE:
+        handler.startDocument()
+        for child in node.childNodes: dom2sax(child, handler, nsmap)
+        handler.endDocument()
+    
+      elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
+        for child in node.childNodes: dom2sax(child, handler, nsmap)
+    
+      else:
+        # ATTRIBUTE_NODE
+        # ENTITY_NODE
+        # PROCESSING_INSTRUCTION_NODE
+        # COMMENT_NODE
+        # DOCUMENT_TYPE_NODE
+        # NOTATION_NODE
+        pass
+        
+    return locals()
+
+# Keep backwards compatibility with things that directly load 
+# classes/functions from this module
+for key, value in getDomModule(minidom).__dict__.items():
+	globals()[key] = value
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
new file mode 100755
index 00000000..95be4755
--- /dev/null
+++ b/html5lib/treebuilders/etree.py
@@ -0,0 +1,344 @@
+try:
+    from types import ModuleType
+except:
+    from new import module as ModuleType
+import re
+import types
+
+import _base
+from html5lib import ihatexml
+from html5lib import constants
+from html5lib.constants import namespaces
+
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+moduleCache = {}
+
+def getETreeModule(ElementTreeImplementation, fullTree=False):
+    name = "_" + ElementTreeImplementation.__name__+"builder"
+    if name in moduleCache:
+        return moduleCache[name]
+    else:
+        mod = ModuleType("_" + ElementTreeImplementation.__name__+"builder")
+        objs = getETreeBuilder(ElementTreeImplementation, fullTree)
+        mod.__dict__.update(objs)
+        moduleCache[name] = mod    
+        return mod
+
+def getETreeBuilder(ElementTreeImplementation, fullTree=False):
+    ElementTree = ElementTreeImplementation
+    class Element(_base.Node):
+        def __init__(self, name, namespace=None):
+            self._name = name
+            self._namespace = namespace
+            self._element = ElementTree.Element(self._getETreeTag(name,
+                                                                  namespace))
+            if namespace is None:
+                self.nameTuple = namespaces["html"], self._name
+            else:
+                self.nameTuple = self._namespace, self._name
+            self.parent = None
+            self._childNodes = []
+            self._flags = []
+
+        def _getETreeTag(self, name, namespace):
+            if namespace is None:
+                etree_tag = name
+            else:
+                etree_tag = "{%s}%s"%(namespace, name)
+            return etree_tag
+    
+        def _setName(self, name):
+            self._name = name
+            self._element.tag = self._getETreeTag(self._name, self._namespace)
+        
+        def _getName(self):
+            return self._name
+        
+        name = property(_getName, _setName)
+
+        def _setNamespace(self, namespace):
+            self._namespace = namespace
+            self._element.tag = self._getETreeTag(self._name, self._namespace)
+
+        def _getNamespace(self):
+            return self._namespace
+
+        namespace = property(_getNamespace, _setNamespace)
+    
+        def _getAttributes(self):
+            return self._element.attrib
+    
+        def _setAttributes(self, attributes):
+            #Delete existing attributes first
+            #XXX - there may be a better way to do this...
+            for key in self._element.attrib.keys():
+                del self._element.attrib[key]
+            for key, value in attributes.iteritems():
+                if isinstance(key, tuple):
+                    name = "{%s}%s"%(key[2], key[1])
+                else:
+                    name = key
+                self._element.set(name, value)
+    
+        attributes = property(_getAttributes, _setAttributes)
+    
+        def _getChildNodes(self):
+            return self._childNodes    
+        def _setChildNodes(self, value):
+            del self._element[:]
+            self._childNodes = []
+            for element in value:
+                self.insertChild(element)
+    
+        childNodes = property(_getChildNodes, _setChildNodes)
+    
+        def hasContent(self):
+            """Return true if the node has children or text"""
+            return bool(self._element.text or len(self._element))
+    
+        def appendChild(self, node):
+            self._childNodes.append(node)
+            self._element.append(node._element)
+            node.parent = self
+    
+        def insertBefore(self, node, refNode):
+            index = list(self._element).index(refNode._element)
+            self._element.insert(index, node._element)
+            node.parent = self
+    
+        def removeChild(self, node):
+            self._element.remove(node._element)
+            node.parent=None
+    
+        def insertText(self, data, insertBefore=None):
+            if not(len(self._element)):
+                if not self._element.text:
+                    self._element.text = ""
+                self._element.text += data
+            elif insertBefore is None:
+                #Insert the text as the tail of the last child element
+                if not self._element[-1].tail:
+                    self._element[-1].tail = ""
+                self._element[-1].tail += data
+            else:
+                #Insert the text before the specified node
+                children = list(self._element)
+                index = children.index(insertBefore._element)
+                if index > 0:
+                    if not self._element[index-1].tail:
+                        self._element[index-1].tail = ""
+                    self._element[index-1].tail += data
+                else:
+                    if not self._element.text:
+                        self._element.text = ""
+                    self._element.text += data
+    
+        def cloneNode(self):
+            element = type(self)(self.name, self.namespace)
+            for name, value in self.attributes.iteritems():
+                element.attributes[name] = value
+            return element
+    
+        def reparentChildren(self, newParent):
+            if newParent.childNodes:
+                newParent.childNodes[-1]._element.tail += self._element.text
+            else:
+                if not newParent._element.text:
+                    newParent._element.text = ""
+                if self._element.text is not None:
+                    newParent._element.text += self._element.text
+            self._element.text = ""
+            _base.Node.reparentChildren(self, newParent)
+    
+    class Comment(Element):
+        def __init__(self, data):
+            #Use the superclass constructor to set all properties on the 
+            #wrapper element
+            self._element = ElementTree.Comment(data)
+            self.parent = None
+            self._childNodes = []
+            self._flags = []
+            
+        def _getData(self):
+            return self._element.text
+    
+        def _setData(self, value):
+            self._element.text = value
+    
+        data = property(_getData, _setData)
+    
+    class DocumentType(Element):
+        def __init__(self, name, publicId, systemId):
+            Element.__init__(self, "<!DOCTYPE>") 
+            self._element.text = name
+            self.publicId = publicId
+            self.systemId = systemId
+
+        def _getPublicId(self):
+            return self._element.get(u"publicId", "")
+
+        def _setPublicId(self, value):
+            if value is not None:
+                self._element.set(u"publicId", value)
+
+        publicId = property(_getPublicId, _setPublicId)
+    
+        def _getSystemId(self):
+            return self._element.get(u"systemId", "")
+
+        def _setSystemId(self, value):
+            if value is not None:
+                self._element.set(u"systemId", value)
+
+        systemId = property(_getSystemId, _setSystemId)
+    
+    class Document(Element):
+        def __init__(self):
+            Element.__init__(self, "<DOCUMENT_ROOT>") 
+    
+    class DocumentFragment(Element):
+        def __init__(self):
+            Element.__init__(self, "<DOCUMENT_FRAGMENT>")
+    
+    def testSerializer(element):
+        rv = []
+        finalText = None
+        def serializeElement(element, indent=0):
+            if not(hasattr(element, "tag")):
+                element = element.getroot()
+            if element.tag == "<!DOCTYPE>":
+                if element.get("publicId") or element.get("systemId"):
+                    publicId = element.get("publicId") or ""
+                    systemId = element.get("systemId") or ""
+                    rv.append( """<!DOCTYPE %s "%s" "%s">"""%(
+                            element.text, publicId, systemId))
+                else:     
+                    rv.append("<!DOCTYPE %s>"%(element.text,))
+            elif element.tag == "<DOCUMENT_ROOT>":
+                rv.append("#document")
+                if element.text:
+                    rv.append("|%s\"%s\""%(' '*(indent+2), element.text))
+                if element.tail:
+                    finalText = element.tail
+            elif element.tag == ElementTree.Comment:
+                rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+            else:
+                assert type(element.tag) in types.StringTypes, "Expected unicode, got %s"%type(element.tag)
+                nsmatch = tag_regexp.match(element.tag)
+
+                if nsmatch is None:
+                    name = element.tag
+                else:
+                    ns, name = nsmatch.groups()
+                    prefix = constants.prefixes[ns]
+                    name = "%s %s"%(prefix, name)
+                rv.append("|%s<%s>"%(' '*indent, name))
+
+                if hasattr(element, "attrib"):
+                    attributes = []
+                    for name, value in element.attrib.iteritems():
+                        nsmatch = tag_regexp.match(name)
+                        if nsmatch is not None:
+                            ns, name = nsmatch.groups()
+                            prefix = constants.prefixes[ns]
+                            attr_string = "%s %s"%(prefix, name)
+                        else:
+                            attr_string = name
+                        attributes.append((attr_string, value))
+
+                    for name, value in sorted(attributes):
+                        rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+                if element.text:
+                    rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+            indent += 2
+            for child in element:
+                serializeElement(child, indent)
+            if element.tail:
+                rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+        serializeElement(element, 0)
+    
+        if finalText is not None:
+            rv.append("|%s\"%s\""%(' '*2, finalText))
+    
+        return "\n".join(rv)
+    
+    def tostring(element):
+        """Serialize an element and its child nodes to a string"""
+        rv = []
+        finalText = None
+        filter = ihatexml.InfosetFilter()
+        def serializeElement(element):
+            if type(element) == type(ElementTree.ElementTree):
+                element = element.getroot()
+            
+            if element.tag == "<!DOCTYPE>":
+                if element.get("publicId") or element.get("systemId"):
+                    publicId = element.get("publicId") or ""
+                    systemId = element.get("systemId") or ""
+                    rv.append( """<!DOCTYPE %s PUBLIC "%s" "%s">"""%(
+                            element.text, publicId, systemId))
+                else:     
+                    rv.append("<!DOCTYPE %s>"%(element.text,))
+            elif element.tag == "<DOCUMENT_ROOT>":
+                if element.text:
+                    rv.append(element.text)
+                if element.tail:
+                    finalText = element.tail
+    
+                for child in element:
+                    serializeElement(child)
+    
+            elif type(element.tag) == type(ElementTree.Comment):
+                rv.append("<!--%s-->"%(element.text,))
+            else:
+                #This is assumed to be an ordinary element
+                if not element.attrib:
+                    rv.append("<%s>"%(filter.fromXmlName(element.tag),))
+                else:
+                    attr = " ".join(["%s=\"%s\""%(
+                                filter.fromXmlName(name), value) 
+                                     for name, value in element.attrib.iteritems()])
+                    rv.append("<%s %s>"%(element.tag, attr))
+                if element.text:
+                    rv.append(element.text)
+    
+                for child in element:
+                    serializeElement(child)
+    
+                rv.append("</%s>"%(element.tag,))
+    
+            if element.tail:
+                rv.append(element.tail)
+    
+        serializeElement(element)
+    
+        if finalText is not None:
+            rv.append("%s\""%(' '*2, finalText))
+    
+        return "".join(rv)
+    
+    class TreeBuilder(_base.TreeBuilder):
+        documentClass = Document
+        doctypeClass = DocumentType
+        elementClass = Element
+        commentClass = Comment
+        fragmentClass = DocumentFragment
+    
+        def testSerializer(self, element):
+            return testSerializer(element)
+    
+        def getDocument(self):
+            if fullTree:
+                return self.document._element
+            else:
+                if self.defaultNamespace is not None:
+                    return self.document._element.find(
+                        "{%s}html"%self.defaultNamespace)
+                else:
+                    return self.document._element.find("html")
+        
+        def getFragment(self):
+            return _base.TreeBuilder.getFragment(self)._element
+        
+    return locals()
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
new file mode 100644
index 00000000..eee1e3b2
--- /dev/null
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -0,0 +1,336 @@
+import warnings
+import re
+
+import _base
+from html5lib.constants import DataLossWarning
+import html5lib.constants as constants
+import etree as etree_builders
+from html5lib import ihatexml
+
+try:
+    import lxml.etree as etree
+except ImportError:
+    pass
+
+fullTree = True
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+"""Module for supporting the lxml.etree library. The idea here is to use as much
+of the native library as possible, without using fragile hacks like custom element
+names that break between releases. The downside of this is that we cannot represent
+all possible trees; specifically the following are known to cause problems:
+
+Text or comments as siblings of the root element
+Docypes with no name
+
+When any of these things occur, we emit a DataLossWarning
+"""
+
+class DocumentType(object):
+    def __init__(self, name, publicId, systemId):
+        self.name = name         
+        self.publicId = publicId
+        self.systemId = systemId
+
+class Document(object):
+    def __init__(self):
+        self._elementTree = None
+        self._childNodes = []
+
+    def appendChild(self, element):
+        self._elementTree.getroot().addnext(element._element)
+
+    def _getChildNodes(self):
+        return self._childNodes
+    
+    childNodes = property(_getChildNodes)
+
+def testSerializer(element):
+    rv = []
+    finalText = None
+    filter = ihatexml.InfosetFilter()
+    def serializeElement(element, indent=0):
+        if not hasattr(element, "tag"):
+            if  hasattr(element, "getroot"):
+                #Full tree case
+                rv.append("#document")
+                if element.docinfo.internalDTD:
+                    if not (element.docinfo.public_id or 
+                            element.docinfo.system_url):
+                        dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                    else:
+                        dtd_str = """<!DOCTYPE %s "%s" "%s">"""%(
+                            element.docinfo.root_name, 
+                            element.docinfo.public_id,
+                            element.docinfo.system_url)
+                    rv.append("|%s%s"%(' '*(indent+2), dtd_str))
+                next_element = element.getroot()
+                while next_element.getprevious() is not None:
+                    next_element = next_element.getprevious()
+                while next_element is not None:
+                    serializeElement(next_element, indent+2)
+                    next_element = next_element.getnext()
+            elif isinstance(element, basestring):
+                #Text in a fragment
+                rv.append("|%s\"%s\""%(' '*indent, element))
+            else:
+                #Fragment case
+                rv.append("#document-fragment")
+                for next_element in element:
+                    serializeElement(next_element, indent+2)
+        elif type(element.tag) == type(etree.Comment):
+            rv.append("|%s<!-- %s -->"%(' '*indent, element.text))
+        else:
+            nsmatch = etree_builders.tag_regexp.match(element.tag)
+            if nsmatch is not None:
+                ns = nsmatch.group(1)
+                tag = nsmatch.group(2)
+                prefix = constants.prefixes[ns]
+                rv.append("|%s<%s %s>"%(' '*indent, prefix,
+                                        filter.fromXmlName(tag)))
+            else:
+                rv.append("|%s<%s>"%(' '*indent,
+                                     filter.fromXmlName(element.tag)))
+
+            if hasattr(element, "attrib"):
+                attributes = []
+                for name, value in element.attrib.iteritems():
+                    nsmatch = tag_regexp.match(name)
+                    if nsmatch is not None:
+                        ns, name = nsmatch.groups()
+                        name = filter.fromXmlName(name)
+                        prefix = constants.prefixes[ns]
+                        attr_string = "%s %s"%(prefix, name)
+                    else:
+                        attr_string = filter.fromXmlName(name)
+                    attributes.append((attr_string, value))
+
+                for name, value in sorted(attributes):
+                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+
+            if element.text:
+                rv.append("|%s\"%s\"" %(' '*(indent+2), element.text))
+            indent += 2
+            for child in element.getchildren():
+                serializeElement(child, indent)
+        if hasattr(element, "tail") and element.tail:
+            rv.append("|%s\"%s\"" %(' '*(indent-2), element.tail))
+    serializeElement(element, 0)
+
+    if finalText is not None:
+        rv.append("|%s\"%s\""%(' '*2, finalText))
+
+    return "\n".join(rv)
+
+def tostring(element):
+    """Serialize an element and its child nodes to a string"""
+    rv = []
+    finalText = None
+    def serializeElement(element):
+        if not hasattr(element, "tag"):
+            if element.docinfo.internalDTD:
+                if element.docinfo.doctype:
+                    dtd_str = element.docinfo.doctype
+                else:
+                    dtd_str = "<!DOCTYPE %s>"%element.docinfo.root_name
+                rv.append(dtd_str)
+            serializeElement(element.getroot())
+            
+        elif type(element.tag) == type(etree.Comment):
+            rv.append("<!--%s-->"%(element.text,))
+        
+        else:
+            #This is assumed to be an ordinary element
+            if not element.attrib:
+                rv.append("<%s>"%(element.tag,))
+            else:
+                attr = " ".join(["%s=\"%s\""%(name, value) 
+                                 for name, value in element.attrib.iteritems()])
+                rv.append("<%s %s>"%(element.tag, attr))
+            if element.text:
+                rv.append(element.text)
+
+            for child in element.getchildren():
+                serializeElement(child)
+
+            rv.append("</%s>"%(element.tag,))
+
+        if hasattr(element, "tail") and element.tail:
+            rv.append(element.tail)
+
+    serializeElement(element)
+
+    if finalText is not None:
+        rv.append("%s\""%(' '*2, finalText))
+
+    return "".join(rv)
+        
+
+class TreeBuilder(_base.TreeBuilder):
+    documentClass = Document
+    doctypeClass = DocumentType
+    elementClass = None
+    commentClass = None
+    fragmentClass = Document    
+
+    def __init__(self, namespaceHTMLElements, fullTree = False):
+        builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
+        filter = self.filter = ihatexml.InfosetFilter()
+        self.namespaceHTMLElements = namespaceHTMLElements
+
+        class Attributes(dict):
+            def __init__(self, element, value={}):
+                self._element = element
+                dict.__init__(self, value)
+                for key, value in self.iteritems():
+                    if isinstance(key, tuple):
+                        name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
+                    else:
+                        name = filter.coerceAttribute(key)
+                    self._element._element.attrib[name] = value
+
+            def __setitem__(self, key, value):
+                dict.__setitem__(self, key, value)
+                if isinstance(key, tuple):
+                    name = "{%s}%s"%(key[2], filter.coerceAttribute(key[1]))
+                else:
+                    name = filter.coerceAttribute(key)
+                self._element._element.attrib[name] = value
+
+        class Element(builder.Element):
+            def __init__(self, name, namespace):
+                name = filter.coerceElement(name)
+                builder.Element.__init__(self, name, namespace=namespace)
+                self._attributes = Attributes(self)
+
+            def _setName(self, name):
+                self._name = filter.coerceElement(name)
+                self._element.tag = self._getETreeTag(
+                    self._name, self._namespace)
+        
+            def _getName(self):
+                return filter.fromXmlName(self._name)
+        
+            name = property(_getName, _setName)
+
+            def _getAttributes(self):
+                return self._attributes
+
+            def _setAttributes(self, attributes):
+                self._attributes = Attributes(self, attributes)
+    
+            attributes = property(_getAttributes, _setAttributes)
+
+            def insertText(self, data, insertBefore=None):
+                data = filter.coerceCharacters(data)
+                builder.Element.insertText(self, data, insertBefore)
+
+            def appendChild(self, child):
+                builder.Element.appendChild(self, child)
+                
+
+        class Comment(builder.Comment):
+            def __init__(self, data):
+                data = filter.coerceComment(data)
+                builder.Comment.__init__(self, data)
+
+            def _setData(self, data):
+                data = filter.coerceComment(data)
+                self._element.text = data
+
+            def _getData(self):
+                return self._element.text
+
+            data = property(_getData, _setData)
+
+        self.elementClass = Element
+        self.commentClass = builder.Comment
+        #self.fragmentClass = builder.DocumentFragment
+        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
+    
+    def reset(self):
+        _base.TreeBuilder.reset(self)
+        self.insertComment = self.insertCommentInitial
+        self.initial_comments = []
+        self.doctype = None
+
+    def testSerializer(self, element):
+        return testSerializer(element)
+
+    def getDocument(self):
+        if fullTree:
+            return self.document._elementTree
+        else:
+            return self.document._elementTree.getroot()
+    
+    def getFragment(self):
+        fragment = []
+        element = self.openElements[0]._element
+        if element.text:
+            fragment.append(element.text)
+        fragment.extend(element.getchildren())
+        if element.tail:
+            fragment.append(element.tail)
+        return fragment
+
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        if not name or ihatexml.nonXmlNameBMPRegexp.search(name) or name[0] == '"':
+            warnings.warn("lxml cannot represent null or non-xml doctype", DataLossWarning)
+
+        doctype = self.doctypeClass(name, publicId, systemId)
+        self.doctype = doctype
+    
+    def insertCommentInitial(self, data, parent=None):
+        self.initial_comments.append(data)
+    
+    def insertRoot(self, token):
+        """Create the document root"""
+        #Because of the way libxml2 works, it doesn't seem to be possible to
+        #alter information like the doctype after the tree has been parsed. 
+        #Therefore we need to use the built-in parser to create our iniial 
+        #tree, after which we can add elements like normal
+        docStr = ""
+        if self.doctype and self.doctype.name and not self.doctype.name.startswith('"'):
+            docStr += "<!DOCTYPE %s"%self.doctype.name
+            if (self.doctype.publicId is not None or 
+                self.doctype.systemId is not None):
+                docStr += ' PUBLIC "%s" "%s"'%(self.doctype.publicId or "",
+                                               self.doctype.systemId or "")
+            docStr += ">"
+        docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
+        
+        try:
+            root = etree.fromstring(docStr)
+        except etree.XMLSyntaxError:
+            print docStr
+            raise
+        
+        #Append the initial comments:
+        for comment_token in self.initial_comments:
+            root.addprevious(etree.Comment(comment_token["data"]))
+        
+        #Create the root document and add the ElementTree to it
+        self.document = self.documentClass()
+        self.document._elementTree = root.getroottree()
+        
+        # Give the root element the right name
+        name = token["name"]
+        namespace = token.get("namespace", self.defaultNamespace)
+        if namespace is None:
+            etree_tag = name
+        else:
+            etree_tag = "{%s}%s"%(namespace, name)
+        root.tag = etree_tag
+        
+        #Add the root element to the internal child/open data structures
+        root_element = self.elementClass(name, namespace)
+        root_element._element = root
+        self.document._childNodes.append(root_element)
+        self.openElements.append(root_element)
+    
+        #Reset to the default insert comment function
+        self.insertComment = super(TreeBuilder, self).insertComment
diff --git a/html5lib/treebuilders/simpletree.py b/html5lib/treebuilders/simpletree.py
new file mode 100755
index 00000000..67fe7583
--- /dev/null
+++ b/html5lib/treebuilders/simpletree.py
@@ -0,0 +1,256 @@
+import _base
+from html5lib.constants import voidElements, namespaces, prefixes
+from xml.sax.saxutils import escape
+
+# Really crappy basic implementation of a DOM-core like thing
+class Node(_base.Node):
+    type = -1
+    def __init__(self, name):
+        self.name = name
+        self.parent = None
+        self.value = None
+        self.childNodes = []
+        self._flags = []
+
+    def __iter__(self):
+        for node in self.childNodes:
+            yield node
+            for item in node:
+                yield item
+
+    def __unicode__(self):
+        return self.name
+
+    def toxml(self):
+        raise NotImplementedError
+
+    def printTree(self, indent=0):
+        tree = '\n|%s%s' % (' '* indent, unicode(self))
+        for child in self.childNodes:
+            tree += child.printTree(indent + 2)
+        return tree
+
+    def appendChild(self, node):
+        assert isinstance(node, Node)
+        if (isinstance(node, TextNode) and self.childNodes and
+          isinstance(self.childNodes[-1], TextNode)):
+            self.childNodes[-1].value += node.value
+        else:
+            self.childNodes.append(node)
+        node.parent = self
+
+    def insertText(self, data, insertBefore=None):
+        assert isinstance(data, unicode), "data %s is of type %s expected unicode"%(repr(data), type(data))
+        if insertBefore is None:
+            self.appendChild(TextNode(data))
+        else:
+            self.insertBefore(TextNode(data), insertBefore)
+
+    def insertBefore(self, node, refNode):
+        index = self.childNodes.index(refNode)
+        if (isinstance(node, TextNode) and index > 0 and
+          isinstance(self.childNodes[index - 1], TextNode)):
+            self.childNodes[index - 1].value += node.value
+        else:
+            self.childNodes.insert(index, node)
+        node.parent = self
+
+    def removeChild(self, node):
+        try:
+            self.childNodes.remove(node)
+        except:
+            # XXX
+            raise
+        node.parent = None
+
+    def cloneNode(self):
+        raise NotImplementedError
+
+    def hasContent(self):
+        """Return true if the node has children or text"""
+        return bool(self.childNodes)
+
+    def getNameTuple(self):
+        if self.namespace == None:
+            return namespaces["html"], self.name
+        else:
+            return self.namespace, self.name
+
+    nameTuple = property(getNameTuple)
+
+class Document(Node):
+    type = 1
+    def __init__(self):
+        Node.__init__(self, None)
+
+    def __str__(self):
+        return "#document"
+
+    def __unicode__(self):
+        return str(self)
+
+    def appendChild(self, child):
+        Node.appendChild(self, child)
+
+    def toxml(self, encoding="utf=8"):
+        result = ""
+        for child in self.childNodes:
+            result += child.toxml()
+        return result.encode(encoding)
+
+    def hilite(self, encoding="utf-8"):
+        result = "<pre>"
+        for child in self.childNodes:
+            result += child.hilite()
+        return result.encode(encoding) + "</pre>"
+    
+    def printTree(self):
+        tree = unicode(self)
+        for child in self.childNodes:
+            tree += child.printTree(2)
+        return tree
+
+    def cloneNode(self):
+        return Document()
+
+class DocumentFragment(Document):
+    type = 2
+    def __str__(self):
+        return "#document-fragment"
+
+    def __unicode__(self):
+        return str(self)
+
+    def cloneNode(self):
+        return DocumentFragment()
+
+class DocumentType(Node):
+    type = 3
+    def __init__(self, name, publicId, systemId):
+        Node.__init__(self, name)
+        self.publicId = publicId
+        self.systemId = systemId
+
+    def __unicode__(self):
+        if self.publicId or self.systemId:
+            publicId = self.publicId or ""
+            systemId = self.systemId or ""
+            return """<!DOCTYPE %s "%s" "%s">"""%(
+                self.name, publicId, systemId)
+                            
+        else:
+            return u"<!DOCTYPE %s>" % self.name
+    
+
+    toxml = __unicode__
+    
+    def hilite(self):
+        return '<code class="markup doctype">&lt;!DOCTYPE %s></code>' % self.name
+
+    def cloneNode(self):
+        return DocumentType(self.name, self.publicId, self.systemId)
+
+class TextNode(Node):
+    type = 4
+    def __init__(self, value):
+        Node.__init__(self, None)
+        self.value = value
+
+    def __unicode__(self):
+        return u"\"%s\"" % self.value
+
+    def toxml(self):
+        return escape(self.value)
+    
+    hilite = toxml
+
+    def cloneNode(self):
+        return TextNode(self.value)
+
+class Element(Node):
+    type = 5
+    def __init__(self, name, namespace=None):
+        Node.__init__(self, name)
+        self.namespace = namespace
+        self.attributes = {}
+
+    def __unicode__(self):
+        if self.namespace == None:
+            return u"<%s>" % self.name
+        else:
+            return u"<%s %s>"%(prefixes[self.namespace], self.name)
+
+    def toxml(self):
+        result = '<' + self.name
+        if self.attributes:
+            for name,value in self.attributes.iteritems():
+                result += u' %s="%s"' % (name, escape(value,{'"':'&quot;'}))
+        if self.childNodes:
+            result += '>'
+            for child in self.childNodes:
+                result += child.toxml()
+            result += u'</%s>' % self.name
+        else:
+            result += u'/>'
+        return result
+    
+    def hilite(self):
+        result = '&lt;<code class="markup element-name">%s</code>' % self.name
+        if self.attributes:
+            for name, value in self.attributes.iteritems():
+                result += ' <code class="markup attribute-name">%s</code>=<code class="markup attribute-value">"%s"</code>' % (name, escape(value, {'"':'&quot;'}))
+        if self.childNodes:
+            result += ">"
+            for child in self.childNodes:
+                result += child.hilite()
+        elif self.name in voidElements:
+            return result + ">"
+        return result + '&lt;/<code class="markup element-name">%s</code>>' % self.name
+
+    def printTree(self, indent):
+        tree = '\n|%s%s' % (' '*indent, unicode(self))
+        indent += 2
+        if self.attributes:
+            for name, value in sorted(self.attributes.iteritems()):
+                if isinstance(name, tuple):
+                    name = "%s %s"%(name[0], name[1])
+                tree += '\n|%s%s="%s"' % (' ' * indent, name, value)
+        for child in self.childNodes:
+            tree += child.printTree(indent)
+        return tree
+
+    def cloneNode(self):
+        newNode = Element(self.name)
+        if hasattr(self, 'namespace'):
+            newNode.namespace = self.namespace
+        for attr, value in self.attributes.iteritems():
+            newNode.attributes[attr] = value
+        return newNode
+
+class CommentNode(Node):
+    type = 6
+    def __init__(self, data):
+        Node.__init__(self, None)
+        self.data = data
+
+    def __unicode__(self):
+        return "<!-- %s -->" % self.data
+    
+    def toxml(self):
+        return "<!--%s-->" % self.data
+
+    def hilite(self):
+        return '<code class="markup comment">&lt;!--%s--></code>' % escape(self.data)
+
+    def cloneNode(self):
+        return CommentNode(self.data)
+
+class TreeBuilder(_base.TreeBuilder):
+    documentClass = Document
+    doctypeClass = DocumentType
+    elementClass = Element
+    commentClass = CommentNode
+    fragmentClass = DocumentFragment
+    
+    def testSerializer(self, node):
+        return node.printTree()
diff --git a/html5lib/treebuilders/soup.py b/html5lib/treebuilders/soup.py
new file mode 100644
index 00000000..9bc5ff0e
--- /dev/null
+++ b/html5lib/treebuilders/soup.py
@@ -0,0 +1,236 @@
+import warnings
+
+warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning)
+
+from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
+
+import _base
+from html5lib.constants import namespaces, DataLossWarning
+
+class AttrList(object):
+    def __init__(self, element):
+        self.element = element
+        self.attrs = dict(self.element.attrs)
+    def __iter__(self):
+        return self.attrs.items().__iter__()
+    def __setitem__(self, name, value):
+        "set attr", name, value
+        self.element[name] = value
+    def items(self):
+        return self.attrs.items()
+    def keys(self):
+        return self.attrs.keys()
+    def __getitem__(self, name):
+        return self.attrs[name]
+    def __contains__(self, name):
+        return name in self.attrs.keys()
+    def __eq__(self, other):
+        if len(self.keys()) != len(other.keys()):
+            return False
+        for item in self.keys():
+            if item not in other:
+                return False
+            if self[item] != other[item]:
+                return False
+        return True
+
+class Element(_base.Node):
+    def __init__(self, element, soup, namespace):
+        _base.Node.__init__(self, element.name)
+        self.element = element
+        self.soup = soup
+        self.namespace = namespace
+
+    def _nodeIndex(self, node, refNode):
+        # Finds a node by identity rather than equality
+        for index in range(len(self.element.contents)):
+            if id(self.element.contents[index]) == id(refNode.element):
+                return index
+        return None
+
+    def appendChild(self, node):
+        if (node.element.__class__ == NavigableString and self.element.contents
+            and self.element.contents[-1].__class__ == NavigableString):
+            # Concatenate new text onto old text node
+            # (TODO: This has O(n^2) performance, for input like "a</a>a</a>a</a>...")
+            newStr = NavigableString(self.element.contents[-1]+node.element)
+
+            # Remove the old text node
+            # (Can't simply use .extract() by itself, because it fails if
+            # an equal text node exists within the parent node)
+            oldElement = self.element.contents[-1]
+            del self.element.contents[-1]
+            oldElement.parent = None
+            oldElement.extract()
+
+            self.element.insert(len(self.element.contents), newStr)
+        else:
+            self.element.insert(len(self.element.contents), node.element)
+            node.parent = self
+
+    def getAttributes(self):
+        return AttrList(self.element)
+
+    def setAttributes(self, attributes):
+        if attributes:
+            for name, value in attributes.items():
+                self.element[name] =  value
+
+    attributes = property(getAttributes, setAttributes)
+    
+    def insertText(self, data, insertBefore=None):
+        text = TextNode(NavigableString(data), self.soup)
+        if insertBefore:
+            self.insertBefore(text, insertBefore)
+        else:
+            self.appendChild(text)
+
+    def insertBefore(self, node, refNode):
+        index = self._nodeIndex(node, refNode)
+        if (node.element.__class__ == NavigableString and self.element.contents
+            and self.element.contents[index-1].__class__ == NavigableString):
+            # (See comments in appendChild)
+            newStr = NavigableString(self.element.contents[index-1]+node.element)
+            oldNode = self.element.contents[index-1]
+            del self.element.contents[index-1]
+            oldNode.parent = None
+            oldNode.extract()
+
+            self.element.insert(index-1, newStr)
+        else:
+            self.element.insert(index, node.element)
+            node.parent = self
+
+    def removeChild(self, node):
+        index = self._nodeIndex(node.parent, node)
+        del node.parent.element.contents[index]
+        node.element.parent = None
+        node.element.extract()
+        node.parent = None
+
+    def reparentChildren(self, newParent):
+        while self.element.contents:
+            child = self.element.contents[0]
+            child.extract()
+            if isinstance(child, Tag):
+                newParent.appendChild(Element(child, self.soup, namespaces["html"]))
+            else:
+                newParent.appendChild(TextNode(child, self.soup))
+
+    def cloneNode(self):
+        node = Element(Tag(self.soup, self.element.name), self.soup, self.namespace)
+        for key,value in self.attributes:
+            node.attributes[key] = value
+        return node
+
+    def hasContent(self):
+        return self.element.contents
+
+    def getNameTuple(self):
+        if self.namespace == None:
+            return namespaces["html"], self.name
+        else:
+            return self.namespace, self.name
+
+    nameTuple = property(getNameTuple)
+
+class TextNode(Element):
+    def __init__(self, element, soup):
+        _base.Node.__init__(self, None)
+        self.element = element
+        self.soup = soup
+    
+    def cloneNode(self):
+        raise NotImplementedError
+
+class TreeBuilder(_base.TreeBuilder):
+    def __init__(self, namespaceHTMLElements):
+        if namespaceHTMLElements:
+            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
+        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
+        
+    def documentClass(self):
+        self.soup = BeautifulSoup("")
+        return Element(self.soup, self.soup, None)
+    
+    def insertDoctype(self, token):
+        name = token["name"]
+        publicId = token["publicId"]
+        systemId = token["systemId"]
+
+        if publicId:
+            self.soup.insert(0, Declaration("DOCTYPE %s PUBLIC \"%s\" \"%s\""%(name, publicId, systemId or "")))
+        elif systemId:
+            self.soup.insert(0, Declaration("DOCTYPE %s SYSTEM \"%s\""%
+                                            (name, systemId)))
+        else:
+            self.soup.insert(0, Declaration("DOCTYPE %s"%name))
+    
+    def elementClass(self, name, namespace):
+        if namespace is not None:
+            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
+        return Element(Tag(self.soup, name), self.soup, namespace)
+        
+    def commentClass(self, data):
+        return TextNode(Comment(data), self.soup)
+    
+    def fragmentClass(self):
+        self.soup = BeautifulSoup("")
+        self.soup.name = "[document_fragment]"
+        return Element(self.soup, self.soup, None) 
+
+    def appendChild(self, node):
+        self.soup.insert(len(self.soup.contents), node.element)
+
+    def testSerializer(self, element):
+        return testSerializer(element)
+
+    def getDocument(self):
+        return self.soup
+    
+    def getFragment(self):
+        return _base.TreeBuilder.getFragment(self).element
+    
+def testSerializer(element):
+    import re
+    rv = []
+    def serializeElement(element, indent=0):
+        if isinstance(element, Declaration):
+            doctype_regexp = r'DOCTYPE\s+(?P<name>[^\s]*)( PUBLIC "(?P<publicId>.*)" "(?P<systemId1>.*)"| SYSTEM "(?P<systemId2>.*)")?'
+            m = re.compile(doctype_regexp).match(element.string)
+            assert m is not None, "DOCTYPE did not match expected format"
+            name = m.group('name')
+            publicId = m.group('publicId')
+            if publicId is not None:
+                systemId = m.group('systemId1') or ""
+            else:
+                systemId = m.group('systemId2')
+
+            if publicId is not None or systemId is not None:
+                rv.append("""|%s<!DOCTYPE %s "%s" "%s">"""%
+                          (' '*indent, name, publicId or "", systemId or ""))
+            else:
+                rv.append("|%s<!DOCTYPE %s>"%(' '*indent, name))
+            
+        elif isinstance(element, BeautifulSoup):
+            if element.name == "[document_fragment]":
+                rv.append("#document-fragment")                
+            else:
+                rv.append("#document")
+
+        elif isinstance(element, Comment):
+            rv.append("|%s<!-- %s -->"%(' '*indent, element.string))
+        elif isinstance(element, unicode):
+            rv.append("|%s\"%s\"" %(' '*indent, element))
+        else:
+            rv.append("|%s<%s>"%(' '*indent, element.name))
+            if element.attrs:
+                for name, value in sorted(element.attrs):
+                    rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
+        indent += 2
+        if hasattr(element, "contents"):
+            for child in element.contents:
+                serializeElement(child, indent)
+    serializeElement(element, 0)
+
+    return "\n".join(rv)
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
new file mode 100644
index 00000000..3a606a8b
--- /dev/null
+++ b/html5lib/treewalkers/__init__.py
@@ -0,0 +1,52 @@
+"""A collection of modules for iterating through different kinds of
+tree, generating tokens identical to those produced by the tokenizer
+module.
+
+To create a tree walker for a new type of tree, you need to do
+implement a tree walker object (called TreeWalker by convention) that
+implements a 'serialize' method taking a tree as sole argument and
+returning an iterator generating tokens.
+"""
+
+treeWalkerCache = {}
+
+def getTreeWalker(treeType, implementation=None, **kwargs):
+    """Get a TreeWalker class for various types of tree with built-in support
+
+    treeType - the name of the tree type required (case-insensitive). Supported
+               values are "simpletree", "dom", "etree" and "beautifulsoup"
+
+               "simpletree" - a built-in DOM-ish tree type with support for some
+                              more pythonic idioms.
+                "dom" - The xml.dom.minidom DOM implementation
+                "pulldom" - The xml.dom.pulldom event stream
+                "etree" - A generic walker for tree implementations exposing an
+                          elementtree-like interface (known to work with
+                          ElementTree, cElementTree and lxml.etree).
+                "lxml" - Optimized walker for lxml.etree
+                "beautifulsoup" - Beautiful soup (if installed)
+                "genshi" - a Genshi stream
+
+    implementation - (Currently applies to the "etree" tree type only). A module
+                      implementing the tree type e.g. xml.etree.ElementTree or
+                      cElementTree."""
+
+    treeType = treeType.lower()
+    if treeType not in treeWalkerCache:
+        if treeType in ("dom", "pulldom", "simpletree"):
+            mod = __import__(treeType, globals())
+            treeWalkerCache[treeType] = mod.TreeWalker
+        elif treeType == "genshi":
+            import genshistream
+            treeWalkerCache[treeType] = genshistream.TreeWalker
+        elif treeType == "beautifulsoup":
+            import soup
+            treeWalkerCache[treeType] = soup.TreeWalker
+        elif treeType == "lxml":
+            import lxmletree
+            treeWalkerCache[treeType] = lxmletree.TreeWalker
+        elif treeType == "etree":
+            import etree
+            # XXX: NEVER cache here, caching is done in the etree submodule
+            return etree.getETreeModule(implementation, **kwargs).TreeWalker
+    return treeWalkerCache.get(treeType)
diff --git a/html5lib/treewalkers/_base.py b/html5lib/treewalkers/_base.py
new file mode 100644
index 00000000..5929ba05
--- /dev/null
+++ b/html5lib/treewalkers/_base.py
@@ -0,0 +1,176 @@
+import gettext
+_ = gettext.gettext
+
+from html5lib.constants import voidElements, spaceCharacters
+spaceCharacters = u"".join(spaceCharacters)
+
+class TreeWalker(object):
+    def __init__(self, tree):
+        self.tree = tree
+
+    def __iter__(self):
+        raise NotImplementedError
+
+    def error(self, msg):
+        return {"type": "SerializeError", "data": msg}
+
+    def normalizeAttrs(self, attrs):
+        newattrs = {}
+        if attrs:
+            #TODO: treewalkers should always have attrs
+            for (namespace,name),value in attrs.iteritems():
+                namespace = unicode(namespace) if namespace else None
+                name = unicode(name)
+                value = unicode(value)
+                newattrs[(namespace,name)] = value
+        return newattrs
+
+    def emptyTag(self, namespace, name, attrs, hasChildren=False):
+        yield {"type": "EmptyTag", "name": unicode(name), 
+               "namespace":unicode(namespace),
+               "data": self.normalizeAttrs(attrs)}
+        if hasChildren:
+            yield self.error(_("Void element has children"))
+
+    def startTag(self, namespace, name, attrs):
+        return {"type": "StartTag", 
+                "name": unicode(name),
+                "namespace":unicode(namespace),
+                "data": self.normalizeAttrs(attrs)}
+
+    def endTag(self, namespace, name):
+        return {"type": "EndTag", 
+                "name": unicode(name),
+                "namespace":unicode(namespace),
+                "data": {}}
+
+    def text(self, data):
+        data = unicode(data)
+        middle = data.lstrip(spaceCharacters)
+        left = data[:len(data)-len(middle)]
+        if left:
+            yield {"type": "SpaceCharacters", "data": left}
+        data = middle
+        middle = data.rstrip(spaceCharacters)
+        right = data[len(middle):]
+        if middle:
+            yield {"type": "Characters", "data": middle}
+        if right:
+            yield {"type": "SpaceCharacters", "data": right}
+
+    def comment(self, data):
+        return {"type": "Comment", "data": unicode(data)}
+
+    def doctype(self, name, publicId=None, systemId=None, correct=True):
+        return {"type": "Doctype",
+                "name": name is not None and unicode(name) or u"",
+                "publicId": publicId,
+                "systemId": systemId,
+                "correct": correct}
+
+    def entity(self, name):
+        return {"type": "Entity", "name": unicode(name)}
+
+    def unknown(self, nodeType):
+        return self.error(_("Unknown node type: ") + nodeType)
+
+class RecursiveTreeWalker(TreeWalker):
+    def walkChildren(self, node):
+        raise NodeImplementedError
+
+    def element(self, node, namespace, name, attrs, hasChildren):
+        if name in voidElements:
+            for token in self.emptyTag(namespace, name, attrs, hasChildren):
+                yield token
+        else:
+            yield self.startTag(name, attrs)
+            if hasChildren:
+                for token in self.walkChildren(node):
+                    yield token
+            yield self.endTag(name)
+
+from xml.dom import Node
+
+DOCUMENT = Node.DOCUMENT_NODE
+DOCTYPE = Node.DOCUMENT_TYPE_NODE
+TEXT = Node.TEXT_NODE
+ELEMENT = Node.ELEMENT_NODE
+COMMENT = Node.COMMENT_NODE
+ENTITY = Node.ENTITY_NODE
+UNKNOWN = "<#UNKNOWN#>"
+
+class NonRecursiveTreeWalker(TreeWalker):
+    def getNodeDetails(self, node):
+        raise NotImplementedError
+    
+    def getFirstChild(self, node):
+        raise NotImplementedError
+    
+    def getNextSibling(self, node):
+        raise NotImplementedError
+    
+    def getParentNode(self, node):
+        raise NotImplementedError
+
+    def __iter__(self):
+        currentNode = self.tree
+        while currentNode is not None:
+            details = self.getNodeDetails(currentNode)
+            type, details = details[0], details[1:]
+            hasChildren = False
+            endTag = None
+
+            if type == DOCTYPE:
+                yield self.doctype(*details)
+
+            elif type == TEXT:
+                for token in self.text(*details):
+                    yield token
+
+            elif type == ELEMENT:
+                namespace, name, attributes, hasChildren = details
+                if name in voidElements:
+                    for token in self.emptyTag(namespace, name, attributes, 
+                                               hasChildren):
+                        yield token
+                    hasChildren = False
+                else:
+                    endTag = name
+                    yield self.startTag(namespace, name, attributes)
+
+            elif type == COMMENT:
+                yield self.comment(details[0])
+
+            elif type == ENTITY:
+                yield self.entity(details[0])
+
+            elif type == DOCUMENT:
+                hasChildren = True
+
+            else:
+                yield self.unknown(details[0])
+            
+            if hasChildren:
+                firstChild = self.getFirstChild(currentNode)
+            else:
+                firstChild = None
+            
+            if firstChild is not None:
+                currentNode = firstChild
+            else:
+                while currentNode is not None:
+                    details = self.getNodeDetails(currentNode)
+                    type, details = details[0], details[1:]
+                    if type == ELEMENT:
+                        namespace, name, attributes, hasChildren = details
+                        if name not in voidElements:
+                            yield self.endTag(namespace, name)
+                    if self.tree is currentNode:
+                        currentNode = None
+                        break
+                    nextSibling = self.getNextSibling(currentNode)
+                    if nextSibling is not None:
+                        currentNode = nextSibling
+                        break
+                    else:
+                        currentNode = self.getParentNode(currentNode)
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
new file mode 100644
index 00000000..383b46cb
--- /dev/null
+++ b/html5lib/treewalkers/dom.py
@@ -0,0 +1,41 @@
+from xml.dom import Node
+
+import gettext
+_ = gettext.gettext
+
+import _base
+from html5lib.constants import voidElements
+
+class TreeWalker(_base.NonRecursiveTreeWalker):
+    def getNodeDetails(self, node):
+        if node.nodeType == Node.DOCUMENT_TYPE_NODE:
+            return _base.DOCTYPE, node.name, node.publicId, node.systemId
+
+        elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+            return _base.TEXT, node.nodeValue
+
+        elif node.nodeType == Node.ELEMENT_NODE:
+            attrs = {}
+            for attr in node.attributes.keys():
+                attr = node.getAttributeNode(attr)
+                attrs[(attr.namespaceURI,attr.localName)] = attr.value
+            return (_base.ELEMENT, node.namespaceURI, node.nodeName, 
+                    attrs, node.hasChildNodes())
+
+        elif node.nodeType == Node.COMMENT_NODE:
+            return _base.COMMENT, node.nodeValue
+
+        elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
+            return (_base.DOCUMENT,)
+
+        else:
+            return _base.UNKNOWN, node.nodeType
+
+    def getFirstChild(self, node):
+        return node.firstChild
+
+    def getNextSibling(self, node):
+        return node.nextSibling
+
+    def getParentNode(self, node):
+        return node.parentNode
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
new file mode 100644
index 00000000..13b03194
--- /dev/null
+++ b/html5lib/treewalkers/etree.py
@@ -0,0 +1,141 @@
+import gettext
+_ = gettext.gettext
+
+try:
+    from types import ModuleType
+except:
+    from new import module as ModuleType
+import copy
+import re
+
+import _base
+from html5lib.constants import voidElements
+
+tag_regexp = re.compile("{([^}]*)}(.*)")
+
+moduleCache = {}
+
+def getETreeModule(ElementTreeImplementation):
+    name = "_" + ElementTreeImplementation.__name__+"builder"
+    if name in moduleCache:
+        return moduleCache[name]
+    else:
+        mod = ModuleType("_" + ElementTreeImplementation.__name__+"builder")
+        objs = getETreeBuilder(ElementTreeImplementation)
+        mod.__dict__.update(objs)
+        moduleCache[name] = mod
+        return mod
+
+def getETreeBuilder(ElementTreeImplementation):
+    ElementTree = ElementTreeImplementation
+
+    class TreeWalker(_base.NonRecursiveTreeWalker):
+        """Given the particular ElementTree representation, this implementation,
+        to avoid using recursion, returns "nodes" as tuples with the following
+        content:
+
+        1. The current element
+        
+        2. The index of the element relative to its parent
+        
+        3. A stack of ancestor elements
+        
+        4. A flag "text", "tail" or None to indicate if the current node is a
+           text node; either the text or tail of the current element (1)
+        """
+        def getNodeDetails(self, node):
+            if isinstance(node, tuple): # It might be the root Element
+                elt, key, parents, flag = node
+                if flag in ("text", "tail"):
+                    return _base.TEXT, getattr(elt, flag)
+                else:
+                    node = elt
+
+            if not(hasattr(node, "tag")):
+                node = node.getroot()
+
+            if node.tag in ("<DOCUMENT_ROOT>", "<DOCUMENT_FRAGMENT>"):
+                return (_base.DOCUMENT,)
+
+            elif node.tag == "<!DOCTYPE>":
+                return (_base.DOCTYPE, node.text, 
+                        node.get("publicId"), node.get("systemId"))
+
+            elif node.tag == ElementTree.Comment:
+                return _base.COMMENT, node.text
+
+            else:
+                assert type(node.tag) in (str, unicode), type(node.tag)
+                #This is assumed to be an ordinary element
+                match = tag_regexp.match(node.tag)
+                if match:
+                    namespace, tag = match.groups()
+                else:
+                    namespace = None
+                    tag = node.tag
+                attrs = {}
+                for name, value in node.attrib.items():
+                    match = tag_regexp.match(name)
+                    if match:
+                        attrs[(match.group(1),match.group(2))] = value
+                    else:
+                        attrs[(None,name)] = value
+                return (_base.ELEMENT, namespace, tag, 
+                        attrs, len(node) or node.text)
+    
+        def getFirstChild(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                element, key, parents, flag = node, None, [], None
+                
+            if flag in ("text", "tail"):
+                return None
+            else:
+                if element.text:
+                    return element, key, parents, "text"
+                elif len(element):
+                    parents.append(element)
+                    return element[0], 0, parents, None
+                else:
+                    return None
+        
+        def getNextSibling(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                return None
+                
+            if flag == "text":
+                if len(element):
+                    parents.append(element)
+                    return element[0], 0, parents, None
+                else:
+                    return None
+            else:
+                if element.tail and flag != "tail":
+                    return element, key, parents, "tail"
+                elif key < len(parents[-1]) - 1:
+                    return parents[-1][key+1], key+1, parents, None
+                else:
+                    return None
+        
+        def getParentNode(self, node):
+            if isinstance(node, tuple):
+                element, key, parents, flag = node
+            else:
+                return None
+            
+            if flag == "text":
+                if not parents:
+                    return element
+                else:
+                    return element, key, parents, None
+            else:
+                parent = parents.pop()
+                if not parents:
+                    return parent
+                else:
+                    return parent, list(parents[-1]).index(parent), parents, None
+
+    return locals()
diff --git a/html5lib/treewalkers/genshistream.py b/html5lib/treewalkers/genshistream.py
new file mode 100644
index 00000000..ef71a83e
--- /dev/null
+++ b/html5lib/treewalkers/genshistream.py
@@ -0,0 +1,70 @@
+from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
+from genshi.core  import  START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
+from genshi.output import NamespaceFlattener
+
+import _base
+
+from html5lib.constants import voidElements
+
+class TreeWalker(_base.TreeWalker):
+    def __iter__(self):
+        depth = 0
+        ignore_until = None
+        previous = None
+        for event in self.tree:
+            if previous is not None:
+                if previous[0] == START:
+                    depth += 1
+                if ignore_until <= depth:
+                    ignore_until = None
+                if ignore_until is None:
+                    for token in self.tokens(previous, event):
+                        yield token
+                        if token["type"] == "EmptyTag":
+                            ignore_until = depth
+                if previous[0] == END:
+                    depth -= 1
+            previous = event
+        if previous is not None:
+            if ignore_until is None or ignore_until <= depth:
+                for token in self.tokens(previous, None):
+                    yield token
+            elif ignore_until is not None:
+                raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
+
+    def tokens(self, event, next):
+        kind, data, pos = event
+        if kind == START:
+            tag, attrib = data
+            name = tag.localname
+            namespace = tag.namespace
+            if tag in voidElements:
+                for token in self.emptyTag(namespace, name, list(attrib),
+                                           not next or next[0] != END 
+                                           or next[1] != tag):
+                    yield token
+            else:
+                yield self.startTag(namespace, name, list(attrib))
+
+        elif kind == END:
+            name = data.localname
+            namespace = data.namespace
+            if name not in voidElements:
+                yield self.endTag(namespace, name)
+
+        elif kind == COMMENT:
+            yield self.comment(data)
+
+        elif kind == TEXT:
+            for token in self.text(data):
+                yield token
+
+        elif kind == DOCTYPE:
+            yield self.doctype(*data)
+
+        elif kind in (XML_NAMESPACE, DOCTYPE, START_NS, END_NS, \
+          START_CDATA, END_CDATA, PI):
+            pass
+
+        else:
+            yield self.unknown(kind)
diff --git a/html5lib/treewalkers/lxmletree.py b/html5lib/treewalkers/lxmletree.py
new file mode 100644
index 00000000..46f4908c
--- /dev/null
+++ b/html5lib/treewalkers/lxmletree.py
@@ -0,0 +1,186 @@
+from lxml import etree
+from html5lib.treebuilders.etree import tag_regexp
+
+from gettext import gettext
+_ = gettext
+
+import _base
+
+from html5lib.constants import voidElements
+from html5lib import ihatexml
+
+class Root(object):
+    def __init__(self, et):
+        self.elementtree = et
+        self.children = []
+        if et.docinfo.internalDTD:
+            self.children.append(Doctype(self, et.docinfo.root_name, 
+                                         et.docinfo.public_id, 
+                                         et.docinfo.system_url))
+        root = et.getroot()
+        node = root
+
+        while node.getprevious() is not None:
+            node = node.getprevious()
+        while node is not None:
+            self.children.append(node)
+            node = node.getnext()
+
+        self.text = None
+        self.tail = None
+    
+    def __getitem__(self, key):
+        return self.children[key]
+
+    def getnext(self):
+        return None
+
+    def __len__(self):
+        return 1
+
+class Doctype(object):
+    def __init__(self, root_node, name, public_id, system_id):
+        self.root_node = root_node
+        self.name = name
+        self.public_id = public_id
+        self.system_id = system_id
+        
+        self.text = None
+        self.tail = None
+
+    def getnext(self):
+        return self.root_node.children[1]
+
+class FragmentRoot(Root):
+    def __init__(self, children):
+        self.children = [FragmentWrapper(self, child) for child in children]
+        self.text = self.tail = None
+
+    def getnext(self):
+        return None
+
+class FragmentWrapper(object):
+    def __init__(self, fragment_root, obj):
+        self.root_node = fragment_root
+        self.obj = obj
+        if hasattr(self.obj, 'text'):
+            self.text = self.obj.text
+        else:
+            self.text = None
+        if hasattr(self.obj, 'tail'):
+            self.tail = self.obj.tail
+        else:
+            self.tail = None
+        self.isstring = isinstance(obj, basestring)
+        
+    def __getattr__(self, name):
+        return getattr(self.obj, name)
+    
+    def getnext(self):
+        siblings = self.root_node.children
+        idx = siblings.index(self)
+        if idx < len(siblings) - 1:
+            return siblings[idx + 1]
+        else:
+            return None
+
+    def __getitem__(self, key):
+        return self.obj[key]
+
+    def __nonzero__(self):
+        return bool(self.obj)
+
+    def getparent(self):
+        return None
+
+    def __str__(self):
+        return str(self.obj)
+
+    def __unicode__(self):
+        return unicode(self.obj)
+
+    def __len__(self):
+        return len(self.obj)
+
+        
+class TreeWalker(_base.NonRecursiveTreeWalker):
+    def __init__(self, tree):
+        if hasattr(tree, "getroot"):
+            tree = Root(tree)
+        elif isinstance(tree, list):
+            tree = FragmentRoot(tree)
+        _base.NonRecursiveTreeWalker.__init__(self, tree)
+        self.filter = ihatexml.InfosetFilter()
+    def getNodeDetails(self, node):
+        if isinstance(node, tuple): # Text node
+            node, key = node
+            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            return _base.TEXT, getattr(node, key)
+
+        elif isinstance(node, Root):
+            return (_base.DOCUMENT,)
+
+        elif isinstance(node, Doctype):
+            return _base.DOCTYPE, node.name, node.public_id, node.system_id
+
+        elif isinstance(node, FragmentWrapper) and node.isstring:
+            return _base.TEXT, node
+
+        elif node.tag == etree.Comment:
+            return _base.COMMENT, node.text
+
+        elif node.tag == etree.Entity:
+            return _base.ENTITY, node.text[1:-1] # strip &;
+
+        else:
+            #This is assumed to be an ordinary element
+            match = tag_regexp.match(node.tag)
+            if match:
+                namespace, tag = match.groups()
+            else:
+                namespace = None
+                tag = node.tag
+            attrs = {}
+            for name, value in node.attrib.items():
+                match = tag_regexp.match(name)
+                if match:
+                    attrs[(match.group(1),match.group(2))] = value
+                else:
+                    attrs[(None,name)] = value
+            return (_base.ELEMENT, namespace, self.filter.fromXmlName(tag), 
+                    attrs, len(node) > 0 or node.text)
+
+    def getFirstChild(self, node):
+        assert not isinstance(node, tuple), _("Text nodes have no children")
+
+        assert len(node) or node.text, "Node has no children"
+        if node.text:
+            return (node, "text")
+        else:
+            return node[0]
+
+    def getNextSibling(self, node):
+        if isinstance(node, tuple): # Text node
+            node, key = node
+            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            if key == "text":
+                # XXX: we cannot use a "bool(node) and node[0] or None" construct here
+                # because node[0] might evaluate to False if it has no child element
+                if len(node):
+                    return node[0]
+                else:
+                    return None
+            else: # tail
+                return node.getnext()
+
+        return node.tail and (node, "tail") or node.getnext()
+
+    def getParentNode(self, node):
+        if isinstance(node, tuple): # Text node
+            node, key = node
+            assert key in ("text", "tail"), _("Text nodes are text or tail, found %s") % key
+            if key == "text":
+                return node
+            # else: fallback to "normal" processing
+
+        return node.getparent()
diff --git a/html5lib/treewalkers/pulldom.py b/html5lib/treewalkers/pulldom.py
new file mode 100644
index 00000000..1f8b95b8
--- /dev/null
+++ b/html5lib/treewalkers/pulldom.py
@@ -0,0 +1,60 @@
+from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
+    COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
+
+import _base
+
+from html5lib.constants import voidElements
+
+class TreeWalker(_base.TreeWalker):
+    def __iter__(self):
+        ignore_until = None
+        previous = None
+        for event in self.tree:
+            if previous is not None and \
+              (ignore_until is None or previous[1] is ignore_until):
+                if previous[1] is ignore_until:
+                    ignore_until = None
+                for token in self.tokens(previous, event):
+                    yield token
+                    if token["type"] == "EmptyTag":
+                        ignore_until = previous[1]
+            previous = event
+        if ignore_until is None or previous[1] is ignore_until:
+            for token in self.tokens(previous, None):
+                yield token
+        elif ignore_until is not None:
+            raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
+
+    def tokens(self, event, next):
+        type, node = event
+        if type == START_ELEMENT:
+            name = node.nodeName
+            namespace = node.namespaceURI
+            attrs = {}
+            for attr in node.attributes.keys():
+                attr = node.getAttributeNode(attr)
+                attrs[(attr.namespaceURI,attr.localName)] = attr.value
+            if name in voidElements:
+                for token in self.emptyTag(namespace,
+                                           name,
+                                           attrs,
+                                           not next or next[1] is not node):
+                    yield token
+            else:
+                yield self.startTag(namespace, name, attrs)
+
+        elif type == END_ELEMENT:
+            name = node.nodeName
+            namespace = node.namespaceURI
+            if name not in voidElements:
+                yield self.endTag(namespace, name)
+
+        elif type == COMMENT:
+            yield self.comment(node.nodeValue)
+
+        elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
+            for token in self.text(node.nodeValue):
+                yield token
+
+        else:
+            yield self.unknown(type)
diff --git a/html5lib/treewalkers/simpletree.py b/html5lib/treewalkers/simpletree.py
new file mode 100644
index 00000000..9e6bd4c5
--- /dev/null
+++ b/html5lib/treewalkers/simpletree.py
@@ -0,0 +1,78 @@
+import gettext
+_ = gettext.gettext
+
+import _base
+
+class TreeWalker(_base.NonRecursiveTreeWalker):
+    """Given that simpletree has no performant way of getting a node's
+    next sibling, this implementation returns "nodes" as tuples with the
+    following content:
+
+    1. The parent Node (Element, Document or DocumentFragment)
+
+    2. The child index of the current node in its parent's children list
+
+    3. A list used as a stack of all ancestors. It is a pair tuple whose
+       first item is a parent Node and second item is a child index.
+    """
+
+    def getNodeDetails(self, node):
+        if isinstance(node, tuple): # It might be the root Node
+            parent, idx, parents = node
+            node = parent.childNodes[idx]
+
+        # testing node.type allows us not to import treebuilders.simpletree
+        if node.type in (1, 2): # Document or DocumentFragment
+            return (_base.DOCUMENT,)
+
+        elif node.type == 3: # DocumentType
+            return _base.DOCTYPE, node.name, node.publicId, node.systemId
+
+        elif node.type == 4: # TextNode
+            return _base.TEXT, node.value
+
+        elif node.type == 5: # Element
+            attrs = {}
+            for name, value in node.attributes.items():
+                if isinstance(name, tuple):
+                    attrs[(name[2],name[1])] = value
+                else:
+                    attrs[(None,name)] = value
+            return (_base.ELEMENT, node.namespace, node.name, 
+                    attrs, node.hasContent())
+
+        elif node.type == 6: # CommentNode
+            return _base.COMMENT, node.data
+
+        else:
+            return _node.UNKNOWN, node.type
+
+    def getFirstChild(self, node):
+        if isinstance(node, tuple): # It might be the root Node
+            parent, idx, parents = node
+            parents.append((parent, idx))
+            node = parent.childNodes[idx]
+        else:
+            parents = []
+
+        assert node.hasContent(), "Node has no children"
+        return (node, 0, parents)
+
+    def getNextSibling(self, node):
+        assert isinstance(node, tuple), "Node is not a tuple: " + str(node)
+        parent, idx, parents = node
+        idx += 1
+        if len(parent.childNodes) > idx:
+            return (parent, idx, parents)
+        else:
+            return None
+
+    def getParentNode(self, node):
+        assert isinstance(node, tuple)
+        parent, idx, parents = node
+        if parents:
+            parent, idx = parents.pop()
+            return parent, idx, parents
+        else:
+            # HACK: We could return ``parent`` but None will stop the algorithm the same way
+            return None
diff --git a/html5lib/treewalkers/soup.py b/html5lib/treewalkers/soup.py
new file mode 100644
index 00000000..fca65ecb
--- /dev/null
+++ b/html5lib/treewalkers/soup.py
@@ -0,0 +1,60 @@
+import re
+import gettext
+_ = gettext.gettext
+
+from BeautifulSoup import BeautifulSoup, Declaration, Comment, Tag
+from html5lib.constants import namespaces
+import _base
+
+class TreeWalker(_base.NonRecursiveTreeWalker):
+    doctype_regexp = re.compile(
+        r'DOCTYPE\s+(?P<name>[^\s]*)(\s*PUBLIC\s*"(?P<publicId>.*)"\s*"(?P<systemId1>.*)"|\s*SYSTEM\s*"(?P<systemId2>.*)")?')
+    def getNodeDetails(self, node):
+        if isinstance(node, BeautifulSoup): # Document or DocumentFragment
+            return (_base.DOCUMENT,)
+
+        elif isinstance(node, Declaration): # DocumentType
+            string = unicode(node.string)
+            #Slice needed to remove markup added during unicode conversion,
+            #but only in some versions of BeautifulSoup/Python
+            if string.startswith('<!') and string.endswith('>'):
+                string = string[2:-1]
+            m = self.doctype_regexp.match(string)
+            #This regexp approach seems wrong and fragile
+            #but beautiful soup stores the doctype as a single thing and we want the seperate bits
+            #It should work as long as the tree is created by html5lib itself but may be wrong if it's
+            #been modified at all
+            #We could just feed to it a html5lib tokenizer, I guess...
+            assert m is not None, "DOCTYPE did not match expected format"
+
+            name = m.group('name')
+            publicId = m.group('publicId')
+            if publicId is not None:
+                systemId = m.group('systemId1')
+            else:
+                systemId = m.group('systemId2')
+            return _base.DOCTYPE, name, publicId or "", systemId or ""
+
+        elif isinstance(node, Comment):
+            string = unicode(node.string)
+            if string.startswith('<!--') and string.endswith('-->'):
+                string = string[4:-3]
+            return _base.COMMENT, string
+
+        elif isinstance(node, unicode): # TextNode
+            return _base.TEXT, node
+
+        elif isinstance(node, Tag): # Element
+            return (_base.ELEMENT, namespaces["html"], node.name,
+                    dict(node.attrs).items(), node.contents)
+        else:
+            return _base.UNKNOWN, node.__class__.__name__
+
+    def getFirstChild(self, node):
+        return node.contents[0]
+
+    def getNextSibling(self, node):
+        return node.nextSibling
+
+    def getParentNode(self, node):
+        return node.parent
diff --git a/html5lib/utils.py b/html5lib/utils.py
new file mode 100644
index 00000000..d53f6788
--- /dev/null
+++ b/html5lib/utils.py
@@ -0,0 +1,175 @@
+try:
+    frozenset
+except NameError:
+    #Import from the sets module for python 2.3
+    from sets import Set as set
+    from sets import ImmutableSet as frozenset
+
+class MethodDispatcher(dict):
+    """Dict with 2 special properties:
+
+    On initiation, keys that are lists, sets or tuples are converted to
+    multiple keys so accessing any one of the items in the original
+    list-like object returns the matching value
+
+    md = MethodDispatcher({("foo", "bar"):"baz"})
+    md["foo"] == "baz"
+
+    A default value which can be set through the default attribute.
+    """
+
+    def __init__(self, items=()):
+        # Using _dictEntries instead of directly assigning to self is about
+        # twice as fast. Please do careful performance testing before changing
+        # anything here.
+        _dictEntries = []
+        for name,value in items:
+            if type(name) in (list, tuple, frozenset, set):
+                for item in name:
+                    _dictEntries.append((item, value))
+            else:
+                _dictEntries.append((name, value))
+        dict.__init__(self, _dictEntries)
+        self.default = None
+
+    def __getitem__(self, key):
+        return dict.get(self, key, self.default)
+
+#Pure python implementation of deque taken from the ASPN Python Cookbook
+#Original code by Raymond Hettinger
+
+class deque(object):
+
+    def __init__(self, iterable=(), maxsize=-1):
+        if not hasattr(self, 'data'):
+            self.left = self.right = 0
+            self.data = {}
+        self.maxsize = maxsize
+        self.extend(iterable)
+
+    def append(self, x):
+        self.data[self.right] = x
+        self.right += 1
+        if self.maxsize != -1 and len(self) > self.maxsize:
+            self.popleft()
+        
+    def appendleft(self, x):
+        self.left -= 1        
+        self.data[self.left] = x
+        if self.maxsize != -1 and len(self) > self.maxsize:
+            self.pop()      
+        
+    def pop(self):
+        if self.left == self.right:
+            raise IndexError('cannot pop from empty deque')
+        self.right -= 1
+        elem = self.data[self.right]
+        del self.data[self.right]         
+        return elem
+    
+    def popleft(self):
+        if self.left == self.right:
+            raise IndexError('cannot pop from empty deque')
+        elem = self.data[self.left]
+        del self.data[self.left]
+        self.left += 1
+        return elem
+
+    def clear(self):
+        self.data.clear()
+        self.left = self.right = 0
+
+    def extend(self, iterable):
+        for elem in iterable:
+            self.append(elem)
+
+    def extendleft(self, iterable):
+        for elem in iterable:
+            self.appendleft(elem)
+
+    def rotate(self, n=1):
+        if self:
+            n %= len(self)
+            for i in xrange(n):
+                self.appendleft(self.pop())
+
+    def __getitem__(self, i):
+        if i < 0:
+            i += len(self)
+        try:
+            return self.data[i + self.left]
+        except KeyError:
+            raise IndexError
+
+    def __setitem__(self, i, value):
+        if i < 0:
+            i += len(self)        
+        try:
+            self.data[i + self.left] = value
+        except KeyError:
+            raise IndexError
+
+    def __delitem__(self, i):
+        size = len(self)
+        if not (-size <= i < size):
+            raise IndexError
+        data = self.data
+        if i < 0:
+            i += size
+        for j in xrange(self.left+i, self.right-1):
+            data[j] = data[j+1]
+        self.pop()
+    
+    def __len__(self):
+        return self.right - self.left
+
+    def __cmp__(self, other):
+        if type(self) != type(other):
+            return cmp(type(self), type(other))
+        return cmp(list(self), list(other))
+            
+    def __repr__(self, _track=[]):
+        if id(self) in _track:
+            return '...'
+        _track.append(id(self))
+        r = 'deque(%r)' % (list(self),)
+        _track.remove(id(self))
+        return r
+    
+    def __getstate__(self):
+        return (tuple(self),)
+    
+    def __setstate__(self, s):
+        self.__init__(s[0])
+        
+    def __hash__(self):
+        raise TypeError
+    
+    def __copy__(self):
+        return self.__class__(self)
+    
+    def __deepcopy__(self, memo={}):
+        from copy import deepcopy
+        result = self.__class__()
+        memo[id(self)] = result
+        result.__init__(deepcopy(tuple(self), memo))
+        return result
+
+#Some utility functions to dal with weirdness around UCS2 vs UCS4
+#python builds
+
+def encodingType():
+    if len() == 2:
+        return "UCS2"
+    else:
+        return "UCS4"
+
+def isSurrogatePair(data):   
+    return (len(data) == 2 and
+            ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and
+            ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF)
+
+def surrogatePairToCodepoint(data):
+    char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + 
+                (ord(data[1]) - 0xDC00))
+    return char_val

From f2b6e3d6da7db3e09bfd3ea8f143764af2da5184 Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Mon, 24 Sep 2012 23:28:17 +1200
Subject: [PATCH 5/6] Check for cuesheet

Attempt to allow single track .wav with cuesheet.
+ Small tidy up
---
 headphones/searcher.py           |  2 +-
 headphones/searcher_rutracker.py | 57 ++++++++++++++++++++++++--------
 2 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/headphones/searcher.py b/headphones/searcher.py
index 643b6afa..98261cb4 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -848,7 +848,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
             
             if rulist:
                 for ru in rulist:
-                    title = ru[0]
+                    title = ru[0].decode('utf-8')
                     size = ru[1]
                     url = ru[2]
                     resultlist.append((title, size, url, provider))
diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py
index 313bab4c..b730038e 100644
--- a/headphones/searcher_rutracker.py
+++ b/headphones/searcher_rutracker.py
@@ -3,7 +3,6 @@
 
 # Headphones rutracker.org search
 # Functions called from searcher.py
-# Requires BeautifulSoup 4 for parsing http://www.crummy.com/software/BeautifulSoup/
 
 import urllib
 import urllib2
@@ -18,7 +17,7 @@ class Rutracker():
 
     logged_in = False
     # Stores a number of login attempts to prevent recursion.
-    login_counter = 0
+    #login_counter = 0
     
     def __init__(self):
 
@@ -34,7 +33,7 @@ class Rutracker():
         if login is None or password is None:
             return False
 
-        self.login_counter += 1
+        #self.login_counter += 1
         
         # No recursion wanted.
         #if self.login_counter > 1:
@@ -104,7 +103,7 @@ class Rutracker():
         try:
             
             page = self.opener.open(searchurl, timeout=60)
-            soup = BeautifulSoup(page.read(), from_encoding="utf-8")
+            soup = BeautifulSoup(page.read())
             
             # Debug
             #logger.debug (soup.prettify()) 
@@ -162,7 +161,7 @@ class Rutracker():
        
         for torrent in torrentlist:
             
-            title = torrent[0]
+            title = torrent[0].encode('utf-8')
             url = torrent[1]
             seeders = torrent[2]
             size = torrent[3]
@@ -179,8 +178,8 @@ class Rutracker():
                 self.cookiejar.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=torrent_id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
                                           
                 # Debug
-                for cookie in self.cookiejar:
-                    logger.debug ('Cookie: %s' % cookie) 
+                #for cookie in self.cookiejar:
+                #    logger.debug ('Cookie: %s' % cookie) 
                      
                 try:
                     page = self.opener.open(url)
@@ -193,9 +192,10 @@ class Rutracker():
                     logger.error('Error getting torrent: %s' % e)  
                     return False      
                 
-                # get torrent track count
+                # get torrent track count and check for cue
                 
                 trackcount = 0
+                cuecount = 0
                 
                 if 'files' in metainfo: # multi
                     for pathfile in metainfo['files']:
@@ -203,17 +203,48 @@ class Rutracker():
                         for file in path:
                             if '.ape' in file or '.flac' in file or '.ogg' in file or '.m4a' in file or '.aac' in file or '.mp3' in file or '.wav' in file or '.aif' in file:
                                 trackcount += 1
-                            
-                logger.debug ('torrent title: %s' % title)
-                logger.debug ('hp trackcount: %s' % hptrackcount) 
-                logger.debug ('torrent trackcount: %s' % trackcount)
-                
+                            if '.cue' in file:
+                                cuecount += 1
+                                     
                 #Torrent topic page
         
                 topicurl = 'http://rutracker.org/forum/viewtopic.php?t=' + torrent_id
+                logger.debug ('torrent title: %s' % title)
+                logger.debug ('headphones trackcount: %s' % hptrackcount) 
+                logger.debug ('rutracker trackcount: %s' % trackcount)
+
+                # If torrent track count less than headphones track count, and there's a cue, then attempt to get track count from log(s)
+                # This is for the case where we have a single .flac/.wav which can be split by cue
+                # Not great, but shouldn't be doing this too often
+                
+                totallogcount = 0
+                if trackcount < hptrackcount and cuecount > 0 and cuecount < hptrackcount:
+                    page = self.opener.open(topicurl, timeout=60)
+                    soup = BeautifulSoup(page.read())
+                    findtoc = soup.find_all(text='TOC of the extracted CD')
+                    if not findtoc:
+                        findtoc = soup.find_all(text='TOC извлечённого CD')
+                    for toc in findtoc:
+                        logcount = 0
+                        for toccontent in toc.find_all_next(text=True):
+                            cut_string = toccontent.split('|')
+                            new_string = cut_string[0].lstrip().rstrip()
+                            if new_string == '1' or new_string == '01':
+                                logcount = 1
+                            elif logcount > 0:
+                                if new_string.isdigit():
+                                    logcount += 1
+                                else:
+                                    break
+                        totallogcount = totallogcount + logcount
+                            
+                if totallogcount > 0:
+                    trackcount = totallogcount        
+                    logger.debug ('rutracker logtrackcount: %s' % totallogcount)
                 
                 # If torrent track count = hp track count then return torrent, 
                 # if greater, check for deluxe/special/foreign editions
+                # if less, then allow if it's a single track with a cue
                 
                 valid = False
                 

From 31eaf1e416d563d97a121ee597bec0124d01c2e1 Mon Sep 17 00:00:00 2001
From: Ade <ade_hall@yahoo.co.uk>
Date: Wed, 26 Sep 2012 19:49:49 +1200
Subject: [PATCH 6/6] Small tidy up

---
 headphones/searcher.py           | 60 ++++++++++++++++++--------------
 headphones/searcher_rutracker.py | 11 +++---
 2 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/headphones/searcher.py b/headphones/searcher.py
index 98261cb4..5c48b816 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -822,39 +822,47 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
         if headphones.RUTRACKER and rulogin:
         
             provider = "rutracker.org"
-            bitrate = False
             
-            if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
-                format = 'lossless'
-                maxsize = 10000000000
-            elif headphones.PREFERRED_QUALITY == 1:
-                format = 'lossless+mp3'
-                maxsize = 10000000000
+            # Ignore if release date not specified, results too unpredictable
+            
+            if not year:
+                logger.info(u'Release date not specified, ignoring for rutracker.org')
             else:
-                format = 'mp3'
-                maxsize = 300000000
-                if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
-                    bitrate = True
+            
+                bitrate = False
+            
+                if headphones.PREFERRED_QUALITY == 3 or losslessOnly:
+                    format = 'lossless'
+                    maxsize = 10000000000
+                elif headphones.PREFERRED_QUALITY == 1:
+                    format = 'lossless+mp3'
+                    maxsize = 10000000000
+                else:
+                    format = 'mp3'
+                    maxsize = 300000000
+                    if headphones.PREFERRED_QUALITY == 2 and headphones.PREFERRED_BITRATE:
+                        bitrate = True
                 
-            # build search url based on above
+                # build search url based on above
             
-            searchURL = rutracker.searchurl(artistterm, albumterm, year, format)
-            logger.info(u'Parsing results from <a href="%s">rutracker.org</a>' % searchURL)
+                searchURL = rutracker.searchurl(artistterm, albumterm, year, format)
+                logger.info(u'Parsing results from <a href="%s">rutracker.org</a>' % searchURL)
             
-            # parse results and get best match
-            rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate)
+                # parse results and get best match
             
-            # add best match to overall results list
+                rulist = rutracker.search(searchURL, maxsize, minimumseeders, albumid, bitrate)
             
-            if rulist:
-                for ru in rulist:
-                    title = ru[0].decode('utf-8')
-                    size = ru[1]
-                    url = ru[2]
-                    resultlist.append((title, size, url, provider))
-                    logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
-            else:
-                logger.info(u"No valid results found from %s" % (provider))
+                # add best match to overall results list
+            
+                if rulist:
+                    for ru in rulist:
+                        title = ru[0].decode('utf-8')
+                        size = ru[1]
+                        url = ru[2]
+                        resultlist.append((title, size, url, provider))
+                        logger.info('Found %s. Size: %s' % (title, helpers.bytes_to_mb(size)))
+                else:
+                    logger.info(u"No valid results found from %s" % (provider))
                 
 
         if headphones.ISOHUNT:
diff --git a/headphones/searcher_rutracker.py b/headphones/searcher_rutracker.py
index b730038e..8bde0d1a 100644
--- a/headphones/searcher_rutracker.py
+++ b/headphones/searcher_rutracker.py
@@ -161,15 +161,16 @@ class Rutracker():
        
         for torrent in torrentlist:
             
-            title = torrent[0].encode('utf-8')
+            returntitle = torrent[0].encode('utf-8')
             url = torrent[1]
             seeders = torrent[2]
             size = torrent[3]
             
             # Attempt to filter out unwanted
             
-            if 'Promo' not in title and 'promo' not in title and 'Vinyl' not in title and 'vinyl' not in title \
-              and 'ongbook' not in title and 'TVRip' not in title and 'HDTV' not in title and 'DVD' not in title \
+            title = returntitle.lower()
+            
+            if 'promo' not in title and 'vinyl' not in title and 'songbook' not in title and 'tvrip' not in title and 'hdtv' not in title and 'dvd' not in title \
               and int(size) <= maxsize and int(seeders) >= minseeders:
                      
                 # Check torrent info
@@ -251,13 +252,13 @@ class Rutracker():
                 if trackcount == hptrackcount:
                     valid = True
                 elif trackcount > hptrackcount:
-                    if 'eluxe' in title or 'dition' in title or 'apanese' in title or 'elease' in title:
+                    if 'deluxe' in title or 'edition' in title or 'japanese' in title:
                         valid = True
                         
                 # return 1st valid torrent if not checking by bitrate, else add to list and return at end
                 
                 if valid:
-                    rulist.append((title, size, topicurl))
+                    rulist.append((returntitle, size, topicurl))
                     if not bitrate:
                         return rulist