From 7c068ca4c57a8db84422276551c6fb247a7567ad Mon Sep 17 00:00:00 2001
From: rembo10
Date: Fri, 28 Mar 2014 11:25:17 -0700
Subject: [PATCH 001/112] Updated the 'maintaining db' log message
---
headphones/importer.py | 2 +-
headphones/mb.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/headphones/importer.py b/headphones/importer.py
index a7481f76..1d9b2260 100644
--- a/headphones/importer.py
+++ b/headphones/importer.py
@@ -210,7 +210,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
logger.info("[%s] Removing all references to release group %s to reflect MusicBrainz" % (artist['artist_name'], items['AlbumID']))
force_repackage = 1
else:
- logger.info("[%s] Error pulling data from MusicBrainz: Maintaining dB" % artist['artist_name'])
+ logger.info("[%s] There was either an error pulling data from MusicBrainz or there might not be any releases for this category" % artist['artist_name'])
# Then search for releases within releasegroups, if releases don't exist, then remove from allalbums/alltracks
diff --git a/headphones/mb.py b/headphones/mb.py
index e7629bba..b45dcfe1 100644
--- a/headphones/mb.py
+++ b/headphones/mb.py
@@ -373,7 +373,7 @@ def get_new_releases(rgid,includeExtras=False,forcefull=False):
logger.info("Removing all references to release %s to reflect MusicBrainz" % items['ReleaseID'])
force_repackage1 = 1
else:
- logger.info("Error pulling data from MusicBrainz: Maintaining dB")
+ logger.info("There was either an error pulling data from MusicBrainz or there might not be any releases for this category")
num_new_releases = 0
From e293302bfe4e3bc5a755e4d915ce35f5668bacbe Mon Sep 17 00:00:00 2001
From: rembo10
Date: Fri, 28 Mar 2014 11:42:14 -0700
Subject: [PATCH 002/112] Catch an exception where lyrics can't be saved due to
encoding error
---
headphones/postprocessor.py | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py
index 492b69c0..d03ac443 100644
--- a/headphones/postprocessor.py
+++ b/headphones/postprocessor.py
@@ -798,8 +798,12 @@ def embedLyrics(downloaded_track_list):
if lyrics:
logger.debug('Adding lyrics to: %s' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
f.lyrics = metalyrics
- f.save()
-
+ try:
+ f.save()
+ except:
+ logger.error('Cannot save lyrics to: %s. Skipping' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
+ continue
+
def renameFiles(albumpath, downloaded_track_list, release):
logger.info('Renaming files')
try:
From dbe644aee2bcbb8175d2f0fffe0ddd896b4501ff Mon Sep 17 00:00:00 2001
From: rembo10
Date: Fri, 28 Mar 2014 18:26:19 -0700
Subject: [PATCH 003/112] Possible fix for album art permissions not being
updated
---
headphones/postprocessor.py | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/headphones/postprocessor.py b/headphones/postprocessor.py
index d03ac443..d1d801a5 100644
--- a/headphones/postprocessor.py
+++ b/headphones/postprocessor.py
@@ -400,7 +400,7 @@ def doPostProcessing(albumid, albumpath, release, tracks, downloaded_track_list,
myDB = db.DBConnection()
myDB.action('UPDATE albums SET status = "Downloaded" WHERE AlbumID=?', [albumid])
myDB.action('UPDATE snatched SET status = "Processed" WHERE AlbumID=?', [albumid])
-
+
# Update the have tracks for all created dirs:
for albumpath in albumpaths:
librarysync.libraryScan(dir=albumpath, append=True, ArtistID=release['ArtistID'], ArtistName=release['ArtistName'])
@@ -500,11 +500,11 @@ def addAlbumArt(artwork, albumpath, release):
if album_art_name.startswith('.'):
album_art_name = album_art_name.replace(0, '_')
- prev = os.umask(headphones.UMASK)
+ #prev = os.umask(headphones.UMASK)
file = open(os.path.join(albumpath, album_art_name), 'wb')
file.write(artwork)
file.close()
- os.umask(prev)
+ #os.umask(prev)
def cleanupFiles(albumpath):
logger.info('Cleaning up files')
@@ -799,11 +799,11 @@ def embedLyrics(downloaded_track_list):
logger.debug('Adding lyrics to: %s' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
f.lyrics = metalyrics
try:
- f.save()
- except:
- logger.error('Cannot save lyrics to: %s. Skipping' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
- continue
-
+ f.save()
+ except:
+ logger.error('Cannot save lyrics to: %s. Skipping' % downloaded_track.decode(headphones.SYS_ENCODING, 'replace'))
+ continue
+
def renameFiles(albumpath, downloaded_track_list, release):
logger.info('Renaming files')
try:
From 6e2338184e736aec0e1b3020abc7c6ec4a15a636 Mon Sep 17 00:00:00 2001
From: Ade
Date: Sun, 30 Mar 2014 16:32:48 +1300
Subject: [PATCH 004/112] Small bug fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Delete from Releases when deleting artist/album
- Searcher - Size limits not quite working
- Searcher - 1st newznab used even if disabled
- Rutracker search stopped working for me, fixed by updating Beautiful
Soup. Moved bs4 and html5lib to lib and ensured (I think) it’s imported
from the right place
---
Headphones.py | 8 +
headphones/importer.py | 2 +
headphones/searcher.py | 10 +-
headphones/webserve.py | 32 +
html5lib/__init__.py | 17 -
html5lib/constants.py | 3085 --
html5lib/filters/formfiller.py | 127 -
html5lib/filters/inject_meta_charset.py | 62 -
html5lib/filters/sanitizer.py | 8 -
html5lib/ihatexml.py | 177 -
html5lib/sanitizer.py | 258 -
html5lib/serializer/__init__.py | 17 -
html5lib/serializer/xhtmlserializer.py | 9 -
html5lib/tests/__init__.py | 12 -
html5lib/tests/mockParser.py | 37 -
html5lib/tests/runparsertests.py | 27 -
html5lib/tests/runtests.py | 20 -
html5lib/tests/support.py | 127 -
html5lib/tests/test_encoding.py | 54 -
html5lib/tests/test_formfiller.py | 296 -
html5lib/tests/test_parser.py | 140 -
html5lib/tests/test_parser2.py | 39 -
html5lib/tests/test_sanitizer.py | 76 -
html5lib/tests/test_serializer.py | 180 -
html5lib/tests/test_stream.py | 97 -
html5lib/tests/test_tokenizer.py | 193 -
html5lib/tests/test_treewalkers.py | 311 -
html5lib/tests/test_whitespace_filter.py | 123 -
.../tests/testdata/encoding/test-yahoo-jp.dat | 10 -
html5lib/tests/testdata/encoding/tests1.dat | 394 -
html5lib/tests/testdata/encoding/tests2.dat | 115 -
html5lib/tests/testdata/sanitizer/tests1.dat | 501 -
html5lib/tests/testdata/serializer/core.test | 125 -
.../tests/testdata/serializer/injectmeta.test | 66 -
.../testdata/serializer/optionaltags.test | 965 -
.../tests/testdata/serializer/options.test | 60 -
.../tests/testdata/serializer/whitespace.test | 51 -
.../tests/testdata/sniffer/htmlOrFeed.json | 43 -
.../testdata/tokenizer/contentModelFlags.test | 75 -
html5lib/tests/testdata/tokenizer/domjs.test | 90 -
.../tests/testdata/tokenizer/entities.test | 283 -
.../tests/testdata/tokenizer/escapeFlag.test | 33 -
.../testdata/tokenizer/namedEntities.test | 44189 ----------------
.../testdata/tokenizer/numericEntities.test | 1313 -
.../tokenizer/pendingSpecChanges.test | 7 -
html5lib/tests/testdata/tokenizer/test1.test | 196 -
html5lib/tests/testdata/tokenizer/test2.test | 179 -
html5lib/tests/testdata/tokenizer/test3.test | 6047 ---
html5lib/tests/testdata/tokenizer/test4.test | 344 -
.../testdata/tokenizer/unicodeChars.test | 1295 -
.../tokenizer/unicodeCharsProblematic.test | 27 -
.../testdata/tokenizer/xmlViolation.test | 22 -
.../testdata/tree-construction/adoption01.dat | 194 -
.../testdata/tree-construction/adoption02.dat | 31 -
.../testdata/tree-construction/comments01.dat | 135 -
.../testdata/tree-construction/doctype01.dat | 370 -
.../tree-construction/domjs-unsafe.dat | Bin 6639 -> 0 bytes
.../testdata/tree-construction/entities01.dat | 603 -
.../testdata/tree-construction/entities02.dat | 249 -
.../tree-construction/html5test-com.dat | 246 -
.../testdata/tree-construction/inbody01.dat | 43 -
.../testdata/tree-construction/isindex.dat | 40 -
...pending-spec-changes-plain-text-unsafe.dat | Bin 115 -> 0 bytes
.../pending-spec-changes.dat | 52 -
.../tree-construction/plain-text-unsafe.dat | Bin 4166 -> 0 bytes
.../tree-construction/scriptdata01.dat | 308 -
.../testdata/tree-construction/tables01.dat | 212 -
.../testdata/tree-construction/tests1.dat | 1952 -
.../testdata/tree-construction/tests10.dat | 799 -
.../testdata/tree-construction/tests11.dat | 482 -
.../testdata/tree-construction/tests12.dat | 62 -
.../testdata/tree-construction/tests14.dat | 74 -
.../testdata/tree-construction/tests15.dat | 208 -
.../testdata/tree-construction/tests16.dat | 2299 -
.../testdata/tree-construction/tests17.dat | 153 -
.../testdata/tree-construction/tests18.dat | 269 -
.../testdata/tree-construction/tests19.dat | 1237 -
.../testdata/tree-construction/tests2.dat | 763 -
.../testdata/tree-construction/tests20.dat | 455 -
.../testdata/tree-construction/tests21.dat | 221 -
.../testdata/tree-construction/tests22.dat | 157 -
.../testdata/tree-construction/tests23.dat | 155 -
.../testdata/tree-construction/tests24.dat | 79 -
.../testdata/tree-construction/tests25.dat | 219 -
.../testdata/tree-construction/tests26.dat | 313 -
.../testdata/tree-construction/tests3.dat | 305 -
.../testdata/tree-construction/tests4.dat | 59 -
.../testdata/tree-construction/tests5.dat | 191 -
.../testdata/tree-construction/tests6.dat | 663 -
.../testdata/tree-construction/tests7.dat | 390 -
.../testdata/tree-construction/tests8.dat | 148 -
.../testdata/tree-construction/tests9.dat | 457 -
.../tree-construction/tests_innerHTML_1.dat | 741 -
.../testdata/tree-construction/tricky01.dat | 261 -
.../testdata/tree-construction/webkit01.dat | 594 -
.../testdata/tree-construction/webkit02.dat | 94 -
html5lib/tests/tokenizertotree.py | 64 -
html5lib/treebuilders/__init__.py | 96 -
html5lib/treebuilders/simpletree.py | 256 -
html5lib/treebuilders/soup.py | 236 -
html5lib/treewalkers/genshistream.py | 70 -
html5lib/treewalkers/simpletree.py | 78 -
html5lib/treewalkers/soup.py | 60 -
html5lib/utils.py | 175 -
{bs4 => lib/bs4}/__init__.py | 157 +-
{bs4 => lib/bs4}/builder/__init__.py | 21 +-
{bs4 => lib/bs4}/builder/_html5lib.py | 103 +-
{bs4 => lib/bs4}/builder/_htmlparser.py | 24 +-
{bs4 => lib/bs4}/builder/_lxml.py | 152 +-
{bs4 => lib/bs4}/dammit.py | 342 +-
lib/bs4/diagnose.py | 204 +
{bs4 => lib/bs4}/element.py | 510 +-
{bs4 => lib/bs4}/testing.py | 55 +
lib/html5lib/__init__.py | 23 +
lib/html5lib/constants.py | 3104 ++
.../html5lib}/filters/__init__.py | 0
{html5lib => lib/html5lib}/filters/_base.py | 2 +
.../filters/alphabeticalattributes.py | 20 +
lib/html5lib/filters/inject_meta_charset.py | 65 +
{html5lib => lib/html5lib}/filters/lint.py | 63 +-
.../html5lib}/filters/optionaltags.py | 15 +-
lib/html5lib/filters/sanitizer.py | 12 +
.../html5lib}/filters/whitespace.py | 21 +-
{html5lib => lib/html5lib}/html5parser.py | 956 +-
lib/html5lib/ihatexml.py | 285 +
{html5lib => lib/html5lib}/inputstream.py | 680 +-
lib/html5lib/sanitizer.py | 271 +
lib/html5lib/serializer/__init__.py | 16 +
.../html5lib}/serializer/htmlserializer.py | 160 +-
{html5lib => lib/html5lib}/tokenizer.py | 899 +-
lib/html5lib/treeadapters/__init__.py | 0
lib/html5lib/treeadapters/sax.py | 44 +
lib/html5lib/treebuilders/__init__.py | 76 +
.../html5lib}/treebuilders/_base.py | 130 +-
.../html5lib}/treebuilders/dom.py | 194 +-
.../html5lib}/treebuilders/etree.py | 265 +-
.../html5lib}/treebuilders/etree_lxml.py | 269 +-
.../html5lib}/treewalkers/__init__.py | 29 +-
.../html5lib}/treewalkers/_base.py | 216 +-
{html5lib => lib/html5lib}/treewalkers/dom.py | 15 +-
.../html5lib}/treewalkers/etree.py | 77 +-
lib/html5lib/treewalkers/genshistream.py | 69 +
.../html5lib}/treewalkers/lxmletree.py | 390 +-
.../html5lib}/treewalkers/pulldom.py | 13 +-
lib/html5lib/trie/__init__.py | 12 +
lib/html5lib/trie/_base.py | 37 +
lib/html5lib/trie/datrie.py | 44 +
lib/html5lib/trie/py.py | 67 +
lib/html5lib/utils.py | 82 +
149 files changed, 7657 insertions(+), 81824 deletions(-)
delete mode 100644 html5lib/__init__.py
delete mode 100644 html5lib/constants.py
delete mode 100644 html5lib/filters/formfiller.py
delete mode 100644 html5lib/filters/inject_meta_charset.py
delete mode 100644 html5lib/filters/sanitizer.py
delete mode 100644 html5lib/ihatexml.py
delete mode 100644 html5lib/sanitizer.py
delete mode 100644 html5lib/serializer/__init__.py
delete mode 100644 html5lib/serializer/xhtmlserializer.py
delete mode 100644 html5lib/tests/__init__.py
delete mode 100644 html5lib/tests/mockParser.py
delete mode 100644 html5lib/tests/runparsertests.py
delete mode 100644 html5lib/tests/runtests.py
delete mode 100644 html5lib/tests/support.py
delete mode 100644 html5lib/tests/test_encoding.py
delete mode 100644 html5lib/tests/test_formfiller.py
delete mode 100644 html5lib/tests/test_parser.py
delete mode 100755 html5lib/tests/test_parser2.py
delete mode 100644 html5lib/tests/test_sanitizer.py
delete mode 100644 html5lib/tests/test_serializer.py
delete mode 100755 html5lib/tests/test_stream.py
delete mode 100644 html5lib/tests/test_tokenizer.py
delete mode 100644 html5lib/tests/test_treewalkers.py
delete mode 100644 html5lib/tests/test_whitespace_filter.py
delete mode 100644 html5lib/tests/testdata/encoding/test-yahoo-jp.dat
delete mode 100644 html5lib/tests/testdata/encoding/tests1.dat
delete mode 100644 html5lib/tests/testdata/encoding/tests2.dat
delete mode 100644 html5lib/tests/testdata/sanitizer/tests1.dat
delete mode 100644 html5lib/tests/testdata/serializer/core.test
delete mode 100644 html5lib/tests/testdata/serializer/injectmeta.test
delete mode 100644 html5lib/tests/testdata/serializer/optionaltags.test
delete mode 100644 html5lib/tests/testdata/serializer/options.test
delete mode 100644 html5lib/tests/testdata/serializer/whitespace.test
delete mode 100644 html5lib/tests/testdata/sniffer/htmlOrFeed.json
delete mode 100644 html5lib/tests/testdata/tokenizer/contentModelFlags.test
delete mode 100644 html5lib/tests/testdata/tokenizer/domjs.test
delete mode 100644 html5lib/tests/testdata/tokenizer/entities.test
delete mode 100644 html5lib/tests/testdata/tokenizer/escapeFlag.test
delete mode 100644 html5lib/tests/testdata/tokenizer/namedEntities.test
delete mode 100644 html5lib/tests/testdata/tokenizer/numericEntities.test
delete mode 100644 html5lib/tests/testdata/tokenizer/pendingSpecChanges.test
delete mode 100644 html5lib/tests/testdata/tokenizer/test1.test
delete mode 100644 html5lib/tests/testdata/tokenizer/test2.test
delete mode 100644 html5lib/tests/testdata/tokenizer/test3.test
delete mode 100644 html5lib/tests/testdata/tokenizer/test4.test
delete mode 100644 html5lib/tests/testdata/tokenizer/unicodeChars.test
delete mode 100644 html5lib/tests/testdata/tokenizer/unicodeCharsProblematic.test
delete mode 100644 html5lib/tests/testdata/tokenizer/xmlViolation.test
delete mode 100644 html5lib/tests/testdata/tree-construction/adoption01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/adoption02.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/comments01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/doctype01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/domjs-unsafe.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/entities01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/entities02.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/html5test-com.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/inbody01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/isindex.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes-plain-text-unsafe.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/pending-spec-changes.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/plain-text-unsafe.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/scriptdata01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tables01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests1.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests10.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests11.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests12.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests14.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests15.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests16.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests17.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests18.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests19.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests2.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests20.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests21.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests22.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests23.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests24.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests25.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests26.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests3.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests4.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests5.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests6.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests7.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests8.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests9.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tests_innerHTML_1.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/tricky01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/webkit01.dat
delete mode 100644 html5lib/tests/testdata/tree-construction/webkit02.dat
delete mode 100644 html5lib/tests/tokenizertotree.py
delete mode 100755 html5lib/treebuilders/__init__.py
delete mode 100755 html5lib/treebuilders/simpletree.py
delete mode 100644 html5lib/treebuilders/soup.py
delete mode 100644 html5lib/treewalkers/genshistream.py
delete mode 100644 html5lib/treewalkers/simpletree.py
delete mode 100644 html5lib/treewalkers/soup.py
delete mode 100644 html5lib/utils.py
rename {bs4 => lib/bs4}/__init__.py (69%)
rename {bs4 => lib/bs4}/builder/__init__.py (95%)
rename {bs4 => lib/bs4}/builder/_html5lib.py (64%)
rename {bs4 => lib/bs4}/builder/_htmlparser.py (91%)
rename {bs4 => lib/bs4}/builder/_lxml.py (54%)
rename {bs4 => lib/bs4}/dammit.py (75%)
create mode 100644 lib/bs4/diagnose.py
rename {bs4 => lib/bs4}/element.py (73%)
rename {bs4 => lib/bs4}/testing.py (89%)
create mode 100644 lib/html5lib/__init__.py
create mode 100644 lib/html5lib/constants.py
rename {html5lib => lib/html5lib}/filters/__init__.py (100%)
rename {html5lib => lib/html5lib}/filters/_base.py (76%)
create mode 100644 lib/html5lib/filters/alphabeticalattributes.py
create mode 100644 lib/html5lib/filters/inject_meta_charset.py
rename {html5lib => lib/html5lib}/filters/lint.py (53%)
rename {html5lib => lib/html5lib}/filters/optionaltags.py (97%)
create mode 100644 lib/html5lib/filters/sanitizer.py
rename {html5lib => lib/html5lib}/filters/whitespace.py (65%)
rename {html5lib => lib/html5lib}/html5parser.py (80%)
create mode 100644 lib/html5lib/ihatexml.py
rename {html5lib => lib/html5lib}/inputstream.py (66%)
create mode 100644 lib/html5lib/sanitizer.py
create mode 100644 lib/html5lib/serializer/__init__.py
rename {html5lib => lib/html5lib}/serializer/htmlserializer.py (69%)
rename {html5lib => lib/html5lib}/tokenizer.py (77%)
create mode 100644 lib/html5lib/treeadapters/__init__.py
create mode 100644 lib/html5lib/treeadapters/sax.py
create mode 100644 lib/html5lib/treebuilders/__init__.py
rename {html5lib => lib/html5lib}/treebuilders/_base.py (81%)
mode change 100755 => 100644
rename {html5lib => lib/html5lib}/treebuilders/dom.py (58%)
rename {html5lib => lib/html5lib}/treebuilders/etree.py (64%)
mode change 100755 => 100644
rename {html5lib => lib/html5lib}/treebuilders/etree_lxml.py (53%)
rename {html5lib => lib/html5lib}/treewalkers/__init__.py (75%)
rename {html5lib => lib/html5lib}/treewalkers/_base.py (57%)
rename {html5lib => lib/html5lib}/treewalkers/dom.py (76%)
rename {html5lib => lib/html5lib}/treewalkers/etree.py (71%)
create mode 100644 lib/html5lib/treewalkers/genshistream.py
rename {html5lib => lib/html5lib}/treewalkers/lxmletree.py (70%)
rename {html5lib => lib/html5lib}/treewalkers/pulldom.py (85%)
create mode 100644 lib/html5lib/trie/__init__.py
create mode 100644 lib/html5lib/trie/_base.py
create mode 100644 lib/html5lib/trie/datrie.py
create mode 100644 lib/html5lib/trie/py.py
create mode 100644 lib/html5lib/utils.py
diff --git a/Headphones.py b/Headphones.py
index 19cc82dd..fc03dd1e 100755
--- a/Headphones.py
+++ b/Headphones.py
@@ -15,6 +15,14 @@
# along with Headphones. If not, see .
import os, sys, locale
+from os.path import dirname
+
+# Headphones path
+base_path = dirname(os.path.abspath(__file__))
+
+# Ensure lib added to path
+sys.path.insert(0, os.path.join(base_path, 'lib'))
+
import time
import signal
diff --git a/headphones/importer.py b/headphones/importer.py
index a7481f76..81b14017 100644
--- a/headphones/importer.py
+++ b/headphones/importer.py
@@ -207,6 +207,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE FROM allalbums WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM tracks WHERE AlbumID=?", [items['AlbumID']])
myDB.action("DELETE FROM alltracks WHERE AlbumID=?", [items['AlbumID']])
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [items['AlbumID']])
logger.info("[%s] Removing all references to release group %s to reflect MusicBrainz" % (artist['artist_name'], items['AlbumID']))
force_repackage = 1
else:
@@ -276,6 +277,7 @@ def addArtisttoDB(artistid, extrasonly=False, forcefull=False):
myDB.action("DELETE from allalbums WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from tracks WHERE ReleaseID=?", [rg['id']])
myDB.action("DELETE from alltracks WHERE ReleaseID=?", [rg['id']])
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rg['id']])
# This will be used later to build a hybrid release
fullreleaselist = []
#Search for releases within a release group
diff --git a/headphones/searcher.py b/headphones/searcher.py
index bb7fadda..89214b81 100644
--- a/headphones/searcher.py
+++ b/headphones/searcher.py
@@ -258,7 +258,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
if headphones.NEWZNAB:
- newznab_hosts = [(headphones.NEWZNAB_HOST, headphones.NEWZNAB_APIKEY, headphones.NEWZNAB_ENABLED)]
+ newznab_hosts = []
for newznab_host in headphones.EXTRA_NEWZNABS:
if newznab_host[2] == '1' or newznab_host[2] == 1:
@@ -555,7 +555,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
for result in resultlist:
- if high_size_limit and (result[1] > high_size_limit):
+ if high_size_limit and (int(result[1]) > high_size_limit):
logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")")
@@ -565,7 +565,7 @@ def searchNZB(albumid=None, new=False, losslessOnly=False):
continue
- if low_size_limit and (result[1] < low_size_limit):
+ if low_size_limit and (int(result[1]) < low_size_limit):
logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")")
continue
@@ -1415,7 +1415,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
for result in resultlist:
- if high_size_limit and (result[1] > high_size_limit):
+ if high_size_limit and (int(result[1]) > high_size_limit):
logger.info(result[0] + " is too large for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Maxsize: " + helpers.bytes_to_mb(high_size_limit) + ")")
# Add lossless nzbs to the "flac list" which we can use if there are no good lossy matches
@@ -1424,7 +1424,7 @@ def searchTorrent(albumid=None, new=False, losslessOnly=False):
continue
- if low_size_limit and (result[1] < low_size_limit):
+ if low_size_limit and (int(result[1]) < low_size_limit):
logger.info(result[0] + " is too small for this album - not considering it. (Size: " + helpers.bytes_to_mb(result[1]) + ", Minsize: " + helpers.bytes_to_mb(low_size_limit) + ")")
continue
diff --git a/headphones/webserve.py b/headphones/webserve.py
index ca8ad578..37c20141 100644
--- a/headphones/webserve.py
+++ b/headphones/webserve.py
@@ -175,6 +175,7 @@ class WebInterface(object):
myDB.action('DELETE from albums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from allalbums WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
myDB.action('DELETE from alltracks WHERE ArtistID=? AND AlbumID=?', [ArtistID, album['AlbumID']])
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', album['AlbumID'])
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
removeExtras.exposed = True
@@ -203,8 +204,18 @@ class WebInterface(object):
for name in namecheck:
artistname=name['ArtistName']
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('UPDATE have SET Matched=NULL WHERE ArtistName=?', [artistname])
@@ -219,8 +230,18 @@ class WebInterface(object):
for ArtistID in emptyArtistIDs:
logger.info(u"Deleting all traces of artist: " + ArtistID)
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from allalbums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from alltracks WHERE ArtistID=?', [ArtistID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])
@@ -299,6 +320,7 @@ class WebInterface(object):
myDB.action('DELETE from tracks WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [AlbumID])
if ArtistID:
raise cherrypy.HTTPRedirect("artistPage?ArtistID=%s" % ArtistID)
else:
@@ -552,8 +574,18 @@ class WebInterface(object):
for ArtistID in args:
if action == 'delete':
myDB.action('DELETE from artists WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM albums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from albums WHERE ArtistID=?', [ArtistID])
myDB.action('DELETE from tracks WHERE ArtistID=?', [ArtistID])
+
+ rgids = myDB.select('SELECT DISTINCT ReleaseGroupID FROM allalbums JOIN releases ON AlbumID = ReleaseGroupID WHERE ArtistID=?', [ArtistID])
+ for rgid in rgids:
+ myDB.action('DELETE from releases WHERE ReleaseGroupID=?', [rgid['ReleaseGroupID']])
+
myDB.action('DELETE from allalbums WHERE AlbumID=?', [AlbumID])
myDB.action('DELETE from alltracks WHERE AlbumID=?', [AlbumID])
myDB.action('INSERT OR REPLACE into blacklist VALUES (?)', [ArtistID])
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
deleted file mode 100644
index 16537aad..00000000
--- a/html5lib/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""
-HTML parsing library based on the WHATWG "HTML5"
-specification. The parser is designed to be compatible with existing
-HTML found in the wild and implements well-defined error recovery that
-is largely compatible with modern desktop web browsers.
-
-Example usage:
-
-import html5lib
-f = open("my_document.html")
-tree = html5lib.parse(f)
-"""
-__version__ = "0.95-dev"
-from html5parser import HTMLParser, parse, parseFragment
-from treebuilders import getTreeBuilder
-from treewalkers import getTreeWalker
-from serializer import serialize
diff --git a/html5lib/constants.py b/html5lib/constants.py
deleted file mode 100644
index b533018e..00000000
--- a/html5lib/constants.py
+++ /dev/null
@@ -1,3085 +0,0 @@
-import string, gettext
-_ = gettext.gettext
-
-try:
- frozenset
-except NameError:
- # Import from the sets module for python 2.3
- from sets import Set as set
- from sets import ImmutableSet as frozenset
-
-EOF = None
-
-E = {
- "null-character":
- _(u"Null character in input stream, replaced with U+FFFD."),
- "invalid-codepoint":
- _(u"Invalid codepoint in stream."),
- "incorrectly-placed-solidus":
- _(u"Solidus (/) incorrectly placed in tag."),
- "incorrect-cr-newline-entity":
- _(u"Incorrect CR newline entity, replaced with LF."),
- "illegal-windows-1252-entity":
- _(u"Entity used with illegal number (windows-1252 reference)."),
- "cant-convert-numeric-entity":
- _(u"Numeric entity couldn't be converted to character "
- u"(codepoint U+%(charAsInt)08x)."),
- "illegal-codepoint-for-numeric-entity":
- _(u"Numeric entity represents an illegal codepoint: "
- u"U+%(charAsInt)08x."),
- "numeric-entity-without-semicolon":
- _(u"Numeric entity didn't end with ';'."),
- "expected-numeric-entity-but-got-eof":
- _(u"Numeric entity expected. Got end of file instead."),
- "expected-numeric-entity":
- _(u"Numeric entity expected but none found."),
- "named-entity-without-semicolon":
- _(u"Named entity didn't end with ';'."),
- "expected-named-entity":
- _(u"Named entity expected. Got none."),
- "attributes-in-end-tag":
- _(u"End tag contains unexpected attributes."),
- 'self-closing-flag-on-end-tag':
- _(u"End tag contains unexpected self-closing flag."),
- "expected-tag-name-but-got-right-bracket":
- _(u"Expected tag name. Got '>' instead."),
- "expected-tag-name-but-got-question-mark":
- _(u"Expected tag name. Got '?' instead. (HTML doesn't "
- u"support processing instructions.)"),
- "expected-tag-name":
- _(u"Expected tag name. Got something else instead"),
- "expected-closing-tag-but-got-right-bracket":
- _(u"Expected closing tag. Got '>' instead. Ignoring '>'."),
- "expected-closing-tag-but-got-eof":
- _(u"Expected closing tag. Unexpected end of file."),
- "expected-closing-tag-but-got-char":
- _(u"Expected closing tag. Unexpected character '%(data)s' found."),
- "eof-in-tag-name":
- _(u"Unexpected end of file in the tag name."),
- "expected-attribute-name-but-got-eof":
- _(u"Unexpected end of file. Expected attribute name instead."),
- "eof-in-attribute-name":
- _(u"Unexpected end of file in attribute name."),
- "invalid-character-in-attribute-name":
- _(u"Invalid chracter in attribute name"),
- "duplicate-attribute":
- _(u"Dropped duplicate attribute on tag."),
- "expected-end-of-tag-name-but-got-eof":
- _(u"Unexpected end of file. Expected = or end of tag."),
- "expected-attribute-value-but-got-eof":
- _(u"Unexpected end of file. Expected attribute value."),
- "expected-attribute-value-but-got-right-bracket":
- _(u"Expected attribute value. Got '>' instead."),
- 'equals-in-unquoted-attribute-value':
- _(u"Unexpected = in unquoted attribute"),
- 'unexpected-character-in-unquoted-attribute-value':
- _(u"Unexpected character in unquoted attribute"),
- "invalid-character-after-attribute-name":
- _(u"Unexpected character after attribute name."),
- "unexpected-character-after-attribute-value":
- _(u"Unexpected character after attribute value."),
- "eof-in-attribute-value-double-quote":
- _(u"Unexpected end of file in attribute value (\")."),
- "eof-in-attribute-value-single-quote":
- _(u"Unexpected end of file in attribute value (')."),
- "eof-in-attribute-value-no-quotes":
- _(u"Unexpected end of file in attribute value."),
- "unexpected-EOF-after-solidus-in-tag":
- _(u"Unexpected end of file in tag. Expected >"),
- "unexpected-character-after-soldius-in-tag":
- _(u"Unexpected character after / in tag. Expected >"),
- "expected-dashes-or-doctype":
- _(u"Expected '--' or 'DOCTYPE'. Not found."),
- "unexpected-bang-after-double-dash-in-comment":
- _(u"Unexpected ! after -- in comment"),
- "unexpected-space-after-double-dash-in-comment":
- _(u"Unexpected space after -- in comment"),
- "incorrect-comment":
- _(u"Incorrect comment."),
- "eof-in-comment":
- _(u"Unexpected end of file in comment."),
- "eof-in-comment-end-dash":
- _(u"Unexpected end of file in comment (-)"),
- "unexpected-dash-after-double-dash-in-comment":
- _(u"Unexpected '-' after '--' found in comment."),
- "eof-in-comment-double-dash":
- _(u"Unexpected end of file in comment (--)."),
- "eof-in-comment-end-space-state":
- _(u"Unexpected end of file in comment."),
- "eof-in-comment-end-bang-state":
- _(u"Unexpected end of file in comment."),
- "unexpected-char-in-comment":
- _(u"Unexpected character in comment found."),
- "need-space-after-doctype":
- _(u"No space after literal string 'DOCTYPE'."),
- "expected-doctype-name-but-got-right-bracket":
- _(u"Unexpected > character. Expected DOCTYPE name."),
- "expected-doctype-name-but-got-eof":
- _(u"Unexpected end of file. Expected DOCTYPE name."),
- "eof-in-doctype-name":
- _(u"Unexpected end of file in DOCTYPE name."),
- "eof-in-doctype":
- _(u"Unexpected end of file in DOCTYPE."),
- "expected-space-or-right-bracket-in-doctype":
- _(u"Expected space or '>'. Got '%(data)s'"),
- "unexpected-end-of-doctype":
- _(u"Unexpected end of DOCTYPE."),
- "unexpected-char-in-doctype":
- _(u"Unexpected character in DOCTYPE."),
- "eof-in-innerhtml":
- _(u"XXX innerHTML EOF"),
- "unexpected-doctype":
- _(u"Unexpected DOCTYPE. Ignored."),
- "non-html-root":
- _(u"html needs to be the first start tag."),
- "expected-doctype-but-got-eof":
- _(u"Unexpected End of file. Expected DOCTYPE."),
- "unknown-doctype":
- _(u"Erroneous DOCTYPE."),
- "expected-doctype-but-got-chars":
- _(u"Unexpected non-space characters. Expected DOCTYPE."),
- "expected-doctype-but-got-start-tag":
- _(u"Unexpected start tag (%(name)s). Expected DOCTYPE."),
- "expected-doctype-but-got-end-tag":
- _(u"Unexpected end tag (%(name)s). Expected DOCTYPE."),
- "end-tag-after-implied-root":
- _(u"Unexpected end tag (%(name)s) after the (implied) root element."),
- "expected-named-closing-tag-but-got-eof":
- _(u"Unexpected end of file. Expected end tag (%(name)s)."),
- "two-heads-are-not-better-than-one":
- _(u"Unexpected start tag head in existing head. Ignored."),
- "unexpected-end-tag":
- _(u"Unexpected end tag (%(name)s). Ignored."),
- "unexpected-start-tag-out-of-my-head":
- _(u"Unexpected start tag (%(name)s) that can be in head. Moved."),
- "unexpected-start-tag":
- _(u"Unexpected start tag (%(name)s)."),
- "missing-end-tag":
- _(u"Missing end tag (%(name)s)."),
- "missing-end-tags":
- _(u"Missing end tags (%(name)s)."),
- "unexpected-start-tag-implies-end-tag":
- _(u"Unexpected start tag (%(startName)s) "
- u"implies end tag (%(endName)s)."),
- "unexpected-start-tag-treated-as":
- _(u"Unexpected start tag (%(originalName)s). Treated as %(newName)s."),
- "deprecated-tag":
- _(u"Unexpected start tag %(name)s. Don't use it!"),
- "unexpected-start-tag-ignored":
- _(u"Unexpected start tag %(name)s. Ignored."),
- "expected-one-end-tag-but-got-another":
- _(u"Unexpected end tag (%(gotName)s). "
- u"Missing end tag (%(expectedName)s)."),
- "end-tag-too-early":
- _(u"End tag (%(name)s) seen too early. Expected other end tag."),
- "end-tag-too-early-named":
- _(u"Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s)."),
- "end-tag-too-early-ignored":
- _(u"End tag (%(name)s) seen too early. Ignored."),
- "adoption-agency-1.1":
- _(u"End tag (%(name)s) violates step 1, "
- u"paragraph 1 of the adoption agency algorithm."),
- "adoption-agency-1.2":
- _(u"End tag (%(name)s) violates step 1, "
- u"paragraph 2 of the adoption agency algorithm."),
- "adoption-agency-1.3":
- _(u"End tag (%(name)s) violates step 1, "
- u"paragraph 3 of the adoption agency algorithm."),
- "unexpected-end-tag-treated-as":
- _(u"Unexpected end tag (%(originalName)s). Treated as %(newName)s."),
- "no-end-tag":
- _(u"This element (%(name)s) has no end tag."),
- "unexpected-implied-end-tag-in-table":
- _(u"Unexpected implied end tag (%(name)s) in the table phase."),
- "unexpected-implied-end-tag-in-table-body":
- _(u"Unexpected implied end tag (%(name)s) in the table body phase."),
- "unexpected-char-implies-table-voodoo":
- _(u"Unexpected non-space characters in "
- u"table context caused voodoo mode."),
- "unexpected-hidden-input-in-table":
- _(u"Unexpected input with type hidden in table context."),
- "unexpected-form-in-table":
- _(u"Unexpected form in table context."),
- "unexpected-start-tag-implies-table-voodoo":
- _(u"Unexpected start tag (%(name)s) in "
- u"table context caused voodoo mode."),
- "unexpected-end-tag-implies-table-voodoo":
- _(u"Unexpected end tag (%(name)s) in "
- u"table context caused voodoo mode."),
- "unexpected-cell-in-table-body":
- _(u"Unexpected table cell start tag (%(name)s) "
- u"in the table body phase."),
- "unexpected-cell-end-tag":
- _(u"Got table cell end tag (%(name)s) "
- u"while required end tags are missing."),
- "unexpected-end-tag-in-table-body":
- _(u"Unexpected end tag (%(name)s) in the table body phase. Ignored."),
- "unexpected-implied-end-tag-in-table-row":
- _(u"Unexpected implied end tag (%(name)s) in the table row phase."),
- "unexpected-end-tag-in-table-row":
- _(u"Unexpected end tag (%(name)s) in the table row phase. Ignored."),
- "unexpected-select-in-select":
- _(u"Unexpected select start tag in the select phase "
- u"treated as select end tag."),
- "unexpected-input-in-select":
- _(u"Unexpected input start tag in the select phase."),
- "unexpected-start-tag-in-select":
- _(u"Unexpected start tag token (%(name)s in the select phase. "
- u"Ignored."),
- "unexpected-end-tag-in-select":
- _(u"Unexpected end tag (%(name)s) in the select phase. Ignored."),
- "unexpected-table-element-start-tag-in-select-in-table":
- _(u"Unexpected table element start tag (%(name)s) in the select in table phase."),
- "unexpected-table-element-end-tag-in-select-in-table":
- _(u"Unexpected table element end tag (%(name)s) in the select in table phase."),
- "unexpected-char-after-body":
- _(u"Unexpected non-space characters in the after body phase."),
- "unexpected-start-tag-after-body":
- _(u"Unexpected start tag token (%(name)s)"
- u" in the after body phase."),
- "unexpected-end-tag-after-body":
- _(u"Unexpected end tag token (%(name)s)"
- u" in the after body phase."),
- "unexpected-char-in-frameset":
- _(u"Unepxected characters in the frameset phase. Characters ignored."),
- "unexpected-start-tag-in-frameset":
- _(u"Unexpected start tag token (%(name)s)"
- u" in the frameset phase. Ignored."),
- "unexpected-frameset-in-frameset-innerhtml":
- _(u"Unexpected end tag token (frameset) "
- u"in the frameset phase (innerHTML)."),
- "unexpected-end-tag-in-frameset":
- _(u"Unexpected end tag token (%(name)s)"
- u" in the frameset phase. Ignored."),
- "unexpected-char-after-frameset":
- _(u"Unexpected non-space characters in the "
- u"after frameset phase. Ignored."),
- "unexpected-start-tag-after-frameset":
- _(u"Unexpected start tag (%(name)s)"
- u" in the after frameset phase. Ignored."),
- "unexpected-end-tag-after-frameset":
- _(u"Unexpected end tag (%(name)s)"
- u" in the after frameset phase. Ignored."),
- "unexpected-end-tag-after-body-innerhtml":
- _(u"Unexpected end tag after body(innerHtml)"),
- "expected-eof-but-got-char":
- _(u"Unexpected non-space characters. Expected end of file."),
- "expected-eof-but-got-start-tag":
- _(u"Unexpected start tag (%(name)s)"
- u". Expected end of file."),
- "expected-eof-but-got-end-tag":
- _(u"Unexpected end tag (%(name)s)"
- u". Expected end of file."),
- "eof-in-table":
- _(u"Unexpected end of file. Expected table content."),
- "eof-in-select":
- _(u"Unexpected end of file. Expected select content."),
- "eof-in-frameset":
- _(u"Unexpected end of file. Expected frameset content."),
- "eof-in-script-in-script":
- _(u"Unexpected end of file. Expected script content."),
- "eof-in-foreign-lands":
- _(u"Unexpected end of file. Expected foreign content"),
- "non-void-element-with-trailing-solidus":
- _(u"Trailing solidus not allowed on element %(name)s"),
- "unexpected-html-element-in-foreign-content":
- _(u"Element %(name)s not allowed in a non-html context"),
- "unexpected-end-tag-before-html":
- _(u"Unexpected end tag (%(name)s) before html."),
- "XXX-undefined-error":
- (u"Undefined error (this sucks and should be fixed)"),
-}
-
-namespaces = {
- "html":"http://www.w3.org/1999/xhtml",
- "mathml":"http://www.w3.org/1998/Math/MathML",
- "svg":"http://www.w3.org/2000/svg",
- "xlink":"http://www.w3.org/1999/xlink",
- "xml":"http://www.w3.org/XML/1998/namespace",
- "xmlns":"http://www.w3.org/2000/xmlns/"
-}
-
-scopingElements = frozenset((
- (namespaces["html"], "applet"),
- (namespaces["html"], "caption"),
- (namespaces["html"], "html"),
- (namespaces["html"], "marquee"),
- (namespaces["html"], "object"),
- (namespaces["html"], "table"),
- (namespaces["html"], "td"),
- (namespaces["html"], "th"),
- (namespaces["mathml"], "mi"),
- (namespaces["mathml"], "mo"),
- (namespaces["mathml"], "mn"),
- (namespaces["mathml"], "ms"),
- (namespaces["mathml"], "mtext"),
- (namespaces["mathml"], "annotation-xml"),
- (namespaces["svg"], "foreignObject"),
- (namespaces["svg"], "desc"),
- (namespaces["svg"], "title"),
-))
-
-formattingElements = frozenset((
- (namespaces["html"], "a"),
- (namespaces["html"], "b"),
- (namespaces["html"], "big"),
- (namespaces["html"], "code"),
- (namespaces["html"], "em"),
- (namespaces["html"], "font"),
- (namespaces["html"], "i"),
- (namespaces["html"], "nobr"),
- (namespaces["html"], "s"),
- (namespaces["html"], "small"),
- (namespaces["html"], "strike"),
- (namespaces["html"], "strong"),
- (namespaces["html"], "tt"),
- (namespaces["html"], "u")
-))
-
-specialElements = frozenset((
- (namespaces["html"], "address"),
- (namespaces["html"], "applet"),
- (namespaces["html"], "area"),
- (namespaces["html"], "article"),
- (namespaces["html"], "aside"),
- (namespaces["html"], "base"),
- (namespaces["html"], "basefont"),
- (namespaces["html"], "bgsound"),
- (namespaces["html"], "blockquote"),
- (namespaces["html"], "body"),
- (namespaces["html"], "br"),
- (namespaces["html"], "button"),
- (namespaces["html"], "caption"),
- (namespaces["html"], "center"),
- (namespaces["html"], "col"),
- (namespaces["html"], "colgroup"),
- (namespaces["html"], "command"),
- (namespaces["html"], "dd"),
- (namespaces["html"], "details"),
- (namespaces["html"], "dir"),
- (namespaces["html"], "div"),
- (namespaces["html"], "dl"),
- (namespaces["html"], "dt"),
- (namespaces["html"], "embed"),
- (namespaces["html"], "fieldset"),
- (namespaces["html"], "figure"),
- (namespaces["html"], "footer"),
- (namespaces["html"], "form"),
- (namespaces["html"], "frame"),
- (namespaces["html"], "frameset"),
- (namespaces["html"], "h1"),
- (namespaces["html"], "h2"),
- (namespaces["html"], "h3"),
- (namespaces["html"], "h4"),
- (namespaces["html"], "h5"),
- (namespaces["html"], "h6"),
- (namespaces["html"], "head"),
- (namespaces["html"], "header"),
- (namespaces["html"], "hr"),
- (namespaces["html"], "html"),
- (namespaces["html"], "iframe"),
- # Note that image is commented out in the spec as "this isn't an
- # element that can end up on the stack, so it doesn't matter,"
- (namespaces["html"], "image"),
- (namespaces["html"], "img"),
- (namespaces["html"], "input"),
- (namespaces["html"], "isindex"),
- (namespaces["html"], "li"),
- (namespaces["html"], "link"),
- (namespaces["html"], "listing"),
- (namespaces["html"], "marquee"),
- (namespaces["html"], "menu"),
- (namespaces["html"], "meta"),
- (namespaces["html"], "nav"),
- (namespaces["html"], "noembed"),
- (namespaces["html"], "noframes"),
- (namespaces["html"], "noscript"),
- (namespaces["html"], "object"),
- (namespaces["html"], "ol"),
- (namespaces["html"], "p"),
- (namespaces["html"], "param"),
- (namespaces["html"], "plaintext"),
- (namespaces["html"], "pre"),
- (namespaces["html"], "script"),
- (namespaces["html"], "section"),
- (namespaces["html"], "select"),
- (namespaces["html"], "style"),
- (namespaces["html"], "table"),
- (namespaces["html"], "tbody"),
- (namespaces["html"], "td"),
- (namespaces["html"], "textarea"),
- (namespaces["html"], "tfoot"),
- (namespaces["html"], "th"),
- (namespaces["html"], "thead"),
- (namespaces["html"], "title"),
- (namespaces["html"], "tr"),
- (namespaces["html"], "ul"),
- (namespaces["html"], "wbr"),
- (namespaces["html"], "xmp"),
- (namespaces["svg"], "foreignObject")
-))
-
-htmlIntegrationPointElements = frozenset((
- (namespaces["mathml"], "annotaion-xml"),
- (namespaces["svg"], "foreignObject"),
- (namespaces["svg"], "desc"),
- (namespaces["svg"], "title")
-))
-
-mathmlTextIntegrationPointElements = frozenset((
- (namespaces["mathml"], "mi"),
- (namespaces["mathml"], "mo"),
- (namespaces["mathml"], "mn"),
- (namespaces["mathml"], "ms"),
- (namespaces["mathml"], "mtext")
-))
-
-spaceCharacters = frozenset((
- u"\t",
- u"\n",
- u"\u000C",
- u" ",
- u"\r"
-))
-
-tableInsertModeElements = frozenset((
- "table",
- "tbody",
- "tfoot",
- "thead",
- "tr"
-))
-
-asciiLowercase = frozenset(string.ascii_lowercase)
-asciiUppercase = frozenset(string.ascii_uppercase)
-asciiLetters = frozenset(string.ascii_letters)
-digits = frozenset(string.digits)
-hexDigits = frozenset(string.hexdigits)
-
-asciiUpper2Lower = dict([(ord(c),ord(c.lower()))
- for c in string.ascii_uppercase])
-
-# Heading elements need to be ordered
-headingElements = (
- "h1",
- "h2",
- "h3",
- "h4",
- "h5",
- "h6"
-)
-
-voidElements = frozenset((
- "base",
- "command",
- "event-source",
- "link",
- "meta",
- "hr",
- "br",
- "img",
- "embed",
- "param",
- "area",
- "col",
- "input",
- "source",
- "track"
-))
-
-cdataElements = frozenset(('title', 'textarea'))
-
-rcdataElements = frozenset((
- 'style',
- 'script',
- 'xmp',
- 'iframe',
- 'noembed',
- 'noframes',
- 'noscript'
-))
-
-booleanAttributes = {
- "": frozenset(("irrelevant",)),
- "style": frozenset(("scoped",)),
- "img": frozenset(("ismap",)),
- "audio": frozenset(("autoplay","controls")),
- "video": frozenset(("autoplay","controls")),
- "script": frozenset(("defer", "async")),
- "details": frozenset(("open",)),
- "datagrid": frozenset(("multiple", "disabled")),
- "command": frozenset(("hidden", "disabled", "checked", "default")),
- "hr": frozenset(("noshade")),
- "menu": frozenset(("autosubmit",)),
- "fieldset": frozenset(("disabled", "readonly")),
- "option": frozenset(("disabled", "readonly", "selected")),
- "optgroup": frozenset(("disabled", "readonly")),
- "button": frozenset(("disabled", "autofocus")),
- "input": frozenset(("disabled", "readonly", "required", "autofocus", "checked", "ismap")),
- "select": frozenset(("disabled", "readonly", "autofocus", "multiple")),
- "output": frozenset(("disabled", "readonly")),
-}
-
-# entitiesWindows1252 has to be _ordered_ and needs to have an index. It
-# therefore can't be a frozenset.
-entitiesWindows1252 = (
- 8364, # 0x80 0x20AC EURO SIGN
- 65533, # 0x81 UNDEFINED
- 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK
- 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK
- 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK
- 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS
- 8224, # 0x86 0x2020 DAGGER
- 8225, # 0x87 0x2021 DOUBLE DAGGER
- 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT
- 8240, # 0x89 0x2030 PER MILLE SIGN
- 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON
- 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK
- 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE
- 65533, # 0x8D UNDEFINED
- 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON
- 65533, # 0x8F UNDEFINED
- 65533, # 0x90 UNDEFINED
- 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK
- 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK
- 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK
- 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK
- 8226, # 0x95 0x2022 BULLET
- 8211, # 0x96 0x2013 EN DASH
- 8212, # 0x97 0x2014 EM DASH
- 732, # 0x98 0x02DC SMALL TILDE
- 8482, # 0x99 0x2122 TRADE MARK SIGN
- 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON
- 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
- 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE
- 65533, # 0x9D UNDEFINED
- 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON
- 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS
-)
-
-xmlEntities = frozenset(('lt;', 'gt;', 'amp;', 'apos;', 'quot;'))
-
-entities = {
- "AElig": u"\xc6",
- "AElig;": u"\xc6",
- "AMP": u"&",
- "AMP;": u"&",
- "Aacute": u"\xc1",
- "Aacute;": u"\xc1",
- "Abreve;": u"\u0102",
- "Acirc": u"\xc2",
- "Acirc;": u"\xc2",
- "Acy;": u"\u0410",
- "Afr;": u"\U0001d504",
- "Agrave": u"\xc0",
- "Agrave;": u"\xc0",
- "Alpha;": u"\u0391",
- "Amacr;": u"\u0100",
- "And;": u"\u2a53",
- "Aogon;": u"\u0104",
- "Aopf;": u"\U0001d538",
- "ApplyFunction;": u"\u2061",
- "Aring": u"\xc5",
- "Aring;": u"\xc5",
- "Ascr;": u"\U0001d49c",
- "Assign;": u"\u2254",
- "Atilde": u"\xc3",
- "Atilde;": u"\xc3",
- "Auml": u"\xc4",
- "Auml;": u"\xc4",
- "Backslash;": u"\u2216",
- "Barv;": u"\u2ae7",
- "Barwed;": u"\u2306",
- "Bcy;": u"\u0411",
- "Because;": u"\u2235",
- "Bernoullis;": u"\u212c",
- "Beta;": u"\u0392",
- "Bfr;": u"\U0001d505",
- "Bopf;": u"\U0001d539",
- "Breve;": u"\u02d8",
- "Bscr;": u"\u212c",
- "Bumpeq;": u"\u224e",
- "CHcy;": u"\u0427",
- "COPY": u"\xa9",
- "COPY;": u"\xa9",
- "Cacute;": u"\u0106",
- "Cap;": u"\u22d2",
- "CapitalDifferentialD;": u"\u2145",
- "Cayleys;": u"\u212d",
- "Ccaron;": u"\u010c",
- "Ccedil": u"\xc7",
- "Ccedil;": u"\xc7",
- "Ccirc;": u"\u0108",
- "Cconint;": u"\u2230",
- "Cdot;": u"\u010a",
- "Cedilla;": u"\xb8",
- "CenterDot;": u"\xb7",
- "Cfr;": u"\u212d",
- "Chi;": u"\u03a7",
- "CircleDot;": u"\u2299",
- "CircleMinus;": u"\u2296",
- "CirclePlus;": u"\u2295",
- "CircleTimes;": u"\u2297",
- "ClockwiseContourIntegral;": u"\u2232",
- "CloseCurlyDoubleQuote;": u"\u201d",
- "CloseCurlyQuote;": u"\u2019",
- "Colon;": u"\u2237",
- "Colone;": u"\u2a74",
- "Congruent;": u"\u2261",
- "Conint;": u"\u222f",
- "ContourIntegral;": u"\u222e",
- "Copf;": u"\u2102",
- "Coproduct;": u"\u2210",
- "CounterClockwiseContourIntegral;": u"\u2233",
- "Cross;": u"\u2a2f",
- "Cscr;": u"\U0001d49e",
- "Cup;": u"\u22d3",
- "CupCap;": u"\u224d",
- "DD;": u"\u2145",
- "DDotrahd;": u"\u2911",
- "DJcy;": u"\u0402",
- "DScy;": u"\u0405",
- "DZcy;": u"\u040f",
- "Dagger;": u"\u2021",
- "Darr;": u"\u21a1",
- "Dashv;": u"\u2ae4",
- "Dcaron;": u"\u010e",
- "Dcy;": u"\u0414",
- "Del;": u"\u2207",
- "Delta;": u"\u0394",
- "Dfr;": u"\U0001d507",
- "DiacriticalAcute;": u"\xb4",
- "DiacriticalDot;": u"\u02d9",
- "DiacriticalDoubleAcute;": u"\u02dd",
- "DiacriticalGrave;": u"`",
- "DiacriticalTilde;": u"\u02dc",
- "Diamond;": u"\u22c4",
- "DifferentialD;": u"\u2146",
- "Dopf;": u"\U0001d53b",
- "Dot;": u"\xa8",
- "DotDot;": u"\u20dc",
- "DotEqual;": u"\u2250",
- "DoubleContourIntegral;": u"\u222f",
- "DoubleDot;": u"\xa8",
- "DoubleDownArrow;": u"\u21d3",
- "DoubleLeftArrow;": u"\u21d0",
- "DoubleLeftRightArrow;": u"\u21d4",
- "DoubleLeftTee;": u"\u2ae4",
- "DoubleLongLeftArrow;": u"\u27f8",
- "DoubleLongLeftRightArrow;": u"\u27fa",
- "DoubleLongRightArrow;": u"\u27f9",
- "DoubleRightArrow;": u"\u21d2",
- "DoubleRightTee;": u"\u22a8",
- "DoubleUpArrow;": u"\u21d1",
- "DoubleUpDownArrow;": u"\u21d5",
- "DoubleVerticalBar;": u"\u2225",
- "DownArrow;": u"\u2193",
- "DownArrowBar;": u"\u2913",
- "DownArrowUpArrow;": u"\u21f5",
- "DownBreve;": u"\u0311",
- "DownLeftRightVector;": u"\u2950",
- "DownLeftTeeVector;": u"\u295e",
- "DownLeftVector;": u"\u21bd",
- "DownLeftVectorBar;": u"\u2956",
- "DownRightTeeVector;": u"\u295f",
- "DownRightVector;": u"\u21c1",
- "DownRightVectorBar;": u"\u2957",
- "DownTee;": u"\u22a4",
- "DownTeeArrow;": u"\u21a7",
- "Downarrow;": u"\u21d3",
- "Dscr;": u"\U0001d49f",
- "Dstrok;": u"\u0110",
- "ENG;": u"\u014a",
- "ETH": u"\xd0",
- "ETH;": u"\xd0",
- "Eacute": u"\xc9",
- "Eacute;": u"\xc9",
- "Ecaron;": u"\u011a",
- "Ecirc": u"\xca",
- "Ecirc;": u"\xca",
- "Ecy;": u"\u042d",
- "Edot;": u"\u0116",
- "Efr;": u"\U0001d508",
- "Egrave": u"\xc8",
- "Egrave;": u"\xc8",
- "Element;": u"\u2208",
- "Emacr;": u"\u0112",
- "EmptySmallSquare;": u"\u25fb",
- "EmptyVerySmallSquare;": u"\u25ab",
- "Eogon;": u"\u0118",
- "Eopf;": u"\U0001d53c",
- "Epsilon;": u"\u0395",
- "Equal;": u"\u2a75",
- "EqualTilde;": u"\u2242",
- "Equilibrium;": u"\u21cc",
- "Escr;": u"\u2130",
- "Esim;": u"\u2a73",
- "Eta;": u"\u0397",
- "Euml": u"\xcb",
- "Euml;": u"\xcb",
- "Exists;": u"\u2203",
- "ExponentialE;": u"\u2147",
- "Fcy;": u"\u0424",
- "Ffr;": u"\U0001d509",
- "FilledSmallSquare;": u"\u25fc",
- "FilledVerySmallSquare;": u"\u25aa",
- "Fopf;": u"\U0001d53d",
- "ForAll;": u"\u2200",
- "Fouriertrf;": u"\u2131",
- "Fscr;": u"\u2131",
- "GJcy;": u"\u0403",
- "GT": u">",
- "GT;": u">",
- "Gamma;": u"\u0393",
- "Gammad;": u"\u03dc",
- "Gbreve;": u"\u011e",
- "Gcedil;": u"\u0122",
- "Gcirc;": u"\u011c",
- "Gcy;": u"\u0413",
- "Gdot;": u"\u0120",
- "Gfr;": u"\U0001d50a",
- "Gg;": u"\u22d9",
- "Gopf;": u"\U0001d53e",
- "GreaterEqual;": u"\u2265",
- "GreaterEqualLess;": u"\u22db",
- "GreaterFullEqual;": u"\u2267",
- "GreaterGreater;": u"\u2aa2",
- "GreaterLess;": u"\u2277",
- "GreaterSlantEqual;": u"\u2a7e",
- "GreaterTilde;": u"\u2273",
- "Gscr;": u"\U0001d4a2",
- "Gt;": u"\u226b",
- "HARDcy;": u"\u042a",
- "Hacek;": u"\u02c7",
- "Hat;": u"^",
- "Hcirc;": u"\u0124",
- "Hfr;": u"\u210c",
- "HilbertSpace;": u"\u210b",
- "Hopf;": u"\u210d",
- "HorizontalLine;": u"\u2500",
- "Hscr;": u"\u210b",
- "Hstrok;": u"\u0126",
- "HumpDownHump;": u"\u224e",
- "HumpEqual;": u"\u224f",
- "IEcy;": u"\u0415",
- "IJlig;": u"\u0132",
- "IOcy;": u"\u0401",
- "Iacute": u"\xcd",
- "Iacute;": u"\xcd",
- "Icirc": u"\xce",
- "Icirc;": u"\xce",
- "Icy;": u"\u0418",
- "Idot;": u"\u0130",
- "Ifr;": u"\u2111",
- "Igrave": u"\xcc",
- "Igrave;": u"\xcc",
- "Im;": u"\u2111",
- "Imacr;": u"\u012a",
- "ImaginaryI;": u"\u2148",
- "Implies;": u"\u21d2",
- "Int;": u"\u222c",
- "Integral;": u"\u222b",
- "Intersection;": u"\u22c2",
- "InvisibleComma;": u"\u2063",
- "InvisibleTimes;": u"\u2062",
- "Iogon;": u"\u012e",
- "Iopf;": u"\U0001d540",
- "Iota;": u"\u0399",
- "Iscr;": u"\u2110",
- "Itilde;": u"\u0128",
- "Iukcy;": u"\u0406",
- "Iuml": u"\xcf",
- "Iuml;": u"\xcf",
- "Jcirc;": u"\u0134",
- "Jcy;": u"\u0419",
- "Jfr;": u"\U0001d50d",
- "Jopf;": u"\U0001d541",
- "Jscr;": u"\U0001d4a5",
- "Jsercy;": u"\u0408",
- "Jukcy;": u"\u0404",
- "KHcy;": u"\u0425",
- "KJcy;": u"\u040c",
- "Kappa;": u"\u039a",
- "Kcedil;": u"\u0136",
- "Kcy;": u"\u041a",
- "Kfr;": u"\U0001d50e",
- "Kopf;": u"\U0001d542",
- "Kscr;": u"\U0001d4a6",
- "LJcy;": u"\u0409",
- "LT": u"<",
- "LT;": u"<",
- "Lacute;": u"\u0139",
- "Lambda;": u"\u039b",
- "Lang;": u"\u27ea",
- "Laplacetrf;": u"\u2112",
- "Larr;": u"\u219e",
- "Lcaron;": u"\u013d",
- "Lcedil;": u"\u013b",
- "Lcy;": u"\u041b",
- "LeftAngleBracket;": u"\u27e8",
- "LeftArrow;": u"\u2190",
- "LeftArrowBar;": u"\u21e4",
- "LeftArrowRightArrow;": u"\u21c6",
- "LeftCeiling;": u"\u2308",
- "LeftDoubleBracket;": u"\u27e6",
- "LeftDownTeeVector;": u"\u2961",
- "LeftDownVector;": u"\u21c3",
- "LeftDownVectorBar;": u"\u2959",
- "LeftFloor;": u"\u230a",
- "LeftRightArrow;": u"\u2194",
- "LeftRightVector;": u"\u294e",
- "LeftTee;": u"\u22a3",
- "LeftTeeArrow;": u"\u21a4",
- "LeftTeeVector;": u"\u295a",
- "LeftTriangle;": u"\u22b2",
- "LeftTriangleBar;": u"\u29cf",
- "LeftTriangleEqual;": u"\u22b4",
- "LeftUpDownVector;": u"\u2951",
- "LeftUpTeeVector;": u"\u2960",
- "LeftUpVector;": u"\u21bf",
- "LeftUpVectorBar;": u"\u2958",
- "LeftVector;": u"\u21bc",
- "LeftVectorBar;": u"\u2952",
- "Leftarrow;": u"\u21d0",
- "Leftrightarrow;": u"\u21d4",
- "LessEqualGreater;": u"\u22da",
- "LessFullEqual;": u"\u2266",
- "LessGreater;": u"\u2276",
- "LessLess;": u"\u2aa1",
- "LessSlantEqual;": u"\u2a7d",
- "LessTilde;": u"\u2272",
- "Lfr;": u"\U0001d50f",
- "Ll;": u"\u22d8",
- "Lleftarrow;": u"\u21da",
- "Lmidot;": u"\u013f",
- "LongLeftArrow;": u"\u27f5",
- "LongLeftRightArrow;": u"\u27f7",
- "LongRightArrow;": u"\u27f6",
- "Longleftarrow;": u"\u27f8",
- "Longleftrightarrow;": u"\u27fa",
- "Longrightarrow;": u"\u27f9",
- "Lopf;": u"\U0001d543",
- "LowerLeftArrow;": u"\u2199",
- "LowerRightArrow;": u"\u2198",
- "Lscr;": u"\u2112",
- "Lsh;": u"\u21b0",
- "Lstrok;": u"\u0141",
- "Lt;": u"\u226a",
- "Map;": u"\u2905",
- "Mcy;": u"\u041c",
- "MediumSpace;": u"\u205f",
- "Mellintrf;": u"\u2133",
- "Mfr;": u"\U0001d510",
- "MinusPlus;": u"\u2213",
- "Mopf;": u"\U0001d544",
- "Mscr;": u"\u2133",
- "Mu;": u"\u039c",
- "NJcy;": u"\u040a",
- "Nacute;": u"\u0143",
- "Ncaron;": u"\u0147",
- "Ncedil;": u"\u0145",
- "Ncy;": u"\u041d",
- "NegativeMediumSpace;": u"\u200b",
- "NegativeThickSpace;": u"\u200b",
- "NegativeThinSpace;": u"\u200b",
- "NegativeVeryThinSpace;": u"\u200b",
- "NestedGreaterGreater;": u"\u226b",
- "NestedLessLess;": u"\u226a",
- "NewLine;": u"\n",
- "Nfr;": u"\U0001d511",
- "NoBreak;": u"\u2060",
- "NonBreakingSpace;": u"\xa0",
- "Nopf;": u"\u2115",
- "Not;": u"\u2aec",
- "NotCongruent;": u"\u2262",
- "NotCupCap;": u"\u226d",
- "NotDoubleVerticalBar;": u"\u2226",
- "NotElement;": u"\u2209",
- "NotEqual;": u"\u2260",
- "NotEqualTilde;": u"\u2242\u0338",
- "NotExists;": u"\u2204",
- "NotGreater;": u"\u226f",
- "NotGreaterEqual;": u"\u2271",
- "NotGreaterFullEqual;": u"\u2267\u0338",
- "NotGreaterGreater;": u"\u226b\u0338",
- "NotGreaterLess;": u"\u2279",
- "NotGreaterSlantEqual;": u"\u2a7e\u0338",
- "NotGreaterTilde;": u"\u2275",
- "NotHumpDownHump;": u"\u224e\u0338",
- "NotHumpEqual;": u"\u224f\u0338",
- "NotLeftTriangle;": u"\u22ea",
- "NotLeftTriangleBar;": u"\u29cf\u0338",
- "NotLeftTriangleEqual;": u"\u22ec",
- "NotLess;": u"\u226e",
- "NotLessEqual;": u"\u2270",
- "NotLessGreater;": u"\u2278",
- "NotLessLess;": u"\u226a\u0338",
- "NotLessSlantEqual;": u"\u2a7d\u0338",
- "NotLessTilde;": u"\u2274",
- "NotNestedGreaterGreater;": u"\u2aa2\u0338",
- "NotNestedLessLess;": u"\u2aa1\u0338",
- "NotPrecedes;": u"\u2280",
- "NotPrecedesEqual;": u"\u2aaf\u0338",
- "NotPrecedesSlantEqual;": u"\u22e0",
- "NotReverseElement;": u"\u220c",
- "NotRightTriangle;": u"\u22eb",
- "NotRightTriangleBar;": u"\u29d0\u0338",
- "NotRightTriangleEqual;": u"\u22ed",
- "NotSquareSubset;": u"\u228f\u0338",
- "NotSquareSubsetEqual;": u"\u22e2",
- "NotSquareSuperset;": u"\u2290\u0338",
- "NotSquareSupersetEqual;": u"\u22e3",
- "NotSubset;": u"\u2282\u20d2",
- "NotSubsetEqual;": u"\u2288",
- "NotSucceeds;": u"\u2281",
- "NotSucceedsEqual;": u"\u2ab0\u0338",
- "NotSucceedsSlantEqual;": u"\u22e1",
- "NotSucceedsTilde;": u"\u227f\u0338",
- "NotSuperset;": u"\u2283\u20d2",
- "NotSupersetEqual;": u"\u2289",
- "NotTilde;": u"\u2241",
- "NotTildeEqual;": u"\u2244",
- "NotTildeFullEqual;": u"\u2247",
- "NotTildeTilde;": u"\u2249",
- "NotVerticalBar;": u"\u2224",
- "Nscr;": u"\U0001d4a9",
- "Ntilde": u"\xd1",
- "Ntilde;": u"\xd1",
- "Nu;": u"\u039d",
- "OElig;": u"\u0152",
- "Oacute": u"\xd3",
- "Oacute;": u"\xd3",
- "Ocirc": u"\xd4",
- "Ocirc;": u"\xd4",
- "Ocy;": u"\u041e",
- "Odblac;": u"\u0150",
- "Ofr;": u"\U0001d512",
- "Ograve": u"\xd2",
- "Ograve;": u"\xd2",
- "Omacr;": u"\u014c",
- "Omega;": u"\u03a9",
- "Omicron;": u"\u039f",
- "Oopf;": u"\U0001d546",
- "OpenCurlyDoubleQuote;": u"\u201c",
- "OpenCurlyQuote;": u"\u2018",
- "Or;": u"\u2a54",
- "Oscr;": u"\U0001d4aa",
- "Oslash": u"\xd8",
- "Oslash;": u"\xd8",
- "Otilde": u"\xd5",
- "Otilde;": u"\xd5",
- "Otimes;": u"\u2a37",
- "Ouml": u"\xd6",
- "Ouml;": u"\xd6",
- "OverBar;": u"\u203e",
- "OverBrace;": u"\u23de",
- "OverBracket;": u"\u23b4",
- "OverParenthesis;": u"\u23dc",
- "PartialD;": u"\u2202",
- "Pcy;": u"\u041f",
- "Pfr;": u"\U0001d513",
- "Phi;": u"\u03a6",
- "Pi;": u"\u03a0",
- "PlusMinus;": u"\xb1",
- "Poincareplane;": u"\u210c",
- "Popf;": u"\u2119",
- "Pr;": u"\u2abb",
- "Precedes;": u"\u227a",
- "PrecedesEqual;": u"\u2aaf",
- "PrecedesSlantEqual;": u"\u227c",
- "PrecedesTilde;": u"\u227e",
- "Prime;": u"\u2033",
- "Product;": u"\u220f",
- "Proportion;": u"\u2237",
- "Proportional;": u"\u221d",
- "Pscr;": u"\U0001d4ab",
- "Psi;": u"\u03a8",
- "QUOT": u"\"",
- "QUOT;": u"\"",
- "Qfr;": u"\U0001d514",
- "Qopf;": u"\u211a",
- "Qscr;": u"\U0001d4ac",
- "RBarr;": u"\u2910",
- "REG": u"\xae",
- "REG;": u"\xae",
- "Racute;": u"\u0154",
- "Rang;": u"\u27eb",
- "Rarr;": u"\u21a0",
- "Rarrtl;": u"\u2916",
- "Rcaron;": u"\u0158",
- "Rcedil;": u"\u0156",
- "Rcy;": u"\u0420",
- "Re;": u"\u211c",
- "ReverseElement;": u"\u220b",
- "ReverseEquilibrium;": u"\u21cb",
- "ReverseUpEquilibrium;": u"\u296f",
- "Rfr;": u"\u211c",
- "Rho;": u"\u03a1",
- "RightAngleBracket;": u"\u27e9",
- "RightArrow;": u"\u2192",
- "RightArrowBar;": u"\u21e5",
- "RightArrowLeftArrow;": u"\u21c4",
- "RightCeiling;": u"\u2309",
- "RightDoubleBracket;": u"\u27e7",
- "RightDownTeeVector;": u"\u295d",
- "RightDownVector;": u"\u21c2",
- "RightDownVectorBar;": u"\u2955",
- "RightFloor;": u"\u230b",
- "RightTee;": u"\u22a2",
- "RightTeeArrow;": u"\u21a6",
- "RightTeeVector;": u"\u295b",
- "RightTriangle;": u"\u22b3",
- "RightTriangleBar;": u"\u29d0",
- "RightTriangleEqual;": u"\u22b5",
- "RightUpDownVector;": u"\u294f",
- "RightUpTeeVector;": u"\u295c",
- "RightUpVector;": u"\u21be",
- "RightUpVectorBar;": u"\u2954",
- "RightVector;": u"\u21c0",
- "RightVectorBar;": u"\u2953",
- "Rightarrow;": u"\u21d2",
- "Ropf;": u"\u211d",
- "RoundImplies;": u"\u2970",
- "Rrightarrow;": u"\u21db",
- "Rscr;": u"\u211b",
- "Rsh;": u"\u21b1",
- "RuleDelayed;": u"\u29f4",
- "SHCHcy;": u"\u0429",
- "SHcy;": u"\u0428",
- "SOFTcy;": u"\u042c",
- "Sacute;": u"\u015a",
- "Sc;": u"\u2abc",
- "Scaron;": u"\u0160",
- "Scedil;": u"\u015e",
- "Scirc;": u"\u015c",
- "Scy;": u"\u0421",
- "Sfr;": u"\U0001d516",
- "ShortDownArrow;": u"\u2193",
- "ShortLeftArrow;": u"\u2190",
- "ShortRightArrow;": u"\u2192",
- "ShortUpArrow;": u"\u2191",
- "Sigma;": u"\u03a3",
- "SmallCircle;": u"\u2218",
- "Sopf;": u"\U0001d54a",
- "Sqrt;": u"\u221a",
- "Square;": u"\u25a1",
- "SquareIntersection;": u"\u2293",
- "SquareSubset;": u"\u228f",
- "SquareSubsetEqual;": u"\u2291",
- "SquareSuperset;": u"\u2290",
- "SquareSupersetEqual;": u"\u2292",
- "SquareUnion;": u"\u2294",
- "Sscr;": u"\U0001d4ae",
- "Star;": u"\u22c6",
- "Sub;": u"\u22d0",
- "Subset;": u"\u22d0",
- "SubsetEqual;": u"\u2286",
- "Succeeds;": u"\u227b",
- "SucceedsEqual;": u"\u2ab0",
- "SucceedsSlantEqual;": u"\u227d",
- "SucceedsTilde;": u"\u227f",
- "SuchThat;": u"\u220b",
- "Sum;": u"\u2211",
- "Sup;": u"\u22d1",
- "Superset;": u"\u2283",
- "SupersetEqual;": u"\u2287",
- "Supset;": u"\u22d1",
- "THORN": u"\xde",
- "THORN;": u"\xde",
- "TRADE;": u"\u2122",
- "TSHcy;": u"\u040b",
- "TScy;": u"\u0426",
- "Tab;": u"\t",
- "Tau;": u"\u03a4",
- "Tcaron;": u"\u0164",
- "Tcedil;": u"\u0162",
- "Tcy;": u"\u0422",
- "Tfr;": u"\U0001d517",
- "Therefore;": u"\u2234",
- "Theta;": u"\u0398",
- "ThickSpace;": u"\u205f\u200a",
- "ThinSpace;": u"\u2009",
- "Tilde;": u"\u223c",
- "TildeEqual;": u"\u2243",
- "TildeFullEqual;": u"\u2245",
- "TildeTilde;": u"\u2248",
- "Topf;": u"\U0001d54b",
- "TripleDot;": u"\u20db",
- "Tscr;": u"\U0001d4af",
- "Tstrok;": u"\u0166",
- "Uacute": u"\xda",
- "Uacute;": u"\xda",
- "Uarr;": u"\u219f",
- "Uarrocir;": u"\u2949",
- "Ubrcy;": u"\u040e",
- "Ubreve;": u"\u016c",
- "Ucirc": u"\xdb",
- "Ucirc;": u"\xdb",
- "Ucy;": u"\u0423",
- "Udblac;": u"\u0170",
- "Ufr;": u"\U0001d518",
- "Ugrave": u"\xd9",
- "Ugrave;": u"\xd9",
- "Umacr;": u"\u016a",
- "UnderBar;": u"_",
- "UnderBrace;": u"\u23df",
- "UnderBracket;": u"\u23b5",
- "UnderParenthesis;": u"\u23dd",
- "Union;": u"\u22c3",
- "UnionPlus;": u"\u228e",
- "Uogon;": u"\u0172",
- "Uopf;": u"\U0001d54c",
- "UpArrow;": u"\u2191",
- "UpArrowBar;": u"\u2912",
- "UpArrowDownArrow;": u"\u21c5",
- "UpDownArrow;": u"\u2195",
- "UpEquilibrium;": u"\u296e",
- "UpTee;": u"\u22a5",
- "UpTeeArrow;": u"\u21a5",
- "Uparrow;": u"\u21d1",
- "Updownarrow;": u"\u21d5",
- "UpperLeftArrow;": u"\u2196",
- "UpperRightArrow;": u"\u2197",
- "Upsi;": u"\u03d2",
- "Upsilon;": u"\u03a5",
- "Uring;": u"\u016e",
- "Uscr;": u"\U0001d4b0",
- "Utilde;": u"\u0168",
- "Uuml": u"\xdc",
- "Uuml;": u"\xdc",
- "VDash;": u"\u22ab",
- "Vbar;": u"\u2aeb",
- "Vcy;": u"\u0412",
- "Vdash;": u"\u22a9",
- "Vdashl;": u"\u2ae6",
- "Vee;": u"\u22c1",
- "Verbar;": u"\u2016",
- "Vert;": u"\u2016",
- "VerticalBar;": u"\u2223",
- "VerticalLine;": u"|",
- "VerticalSeparator;": u"\u2758",
- "VerticalTilde;": u"\u2240",
- "VeryThinSpace;": u"\u200a",
- "Vfr;": u"\U0001d519",
- "Vopf;": u"\U0001d54d",
- "Vscr;": u"\U0001d4b1",
- "Vvdash;": u"\u22aa",
- "Wcirc;": u"\u0174",
- "Wedge;": u"\u22c0",
- "Wfr;": u"\U0001d51a",
- "Wopf;": u"\U0001d54e",
- "Wscr;": u"\U0001d4b2",
- "Xfr;": u"\U0001d51b",
- "Xi;": u"\u039e",
- "Xopf;": u"\U0001d54f",
- "Xscr;": u"\U0001d4b3",
- "YAcy;": u"\u042f",
- "YIcy;": u"\u0407",
- "YUcy;": u"\u042e",
- "Yacute": u"\xdd",
- "Yacute;": u"\xdd",
- "Ycirc;": u"\u0176",
- "Ycy;": u"\u042b",
- "Yfr;": u"\U0001d51c",
- "Yopf;": u"\U0001d550",
- "Yscr;": u"\U0001d4b4",
- "Yuml;": u"\u0178",
- "ZHcy;": u"\u0416",
- "Zacute;": u"\u0179",
- "Zcaron;": u"\u017d",
- "Zcy;": u"\u0417",
- "Zdot;": u"\u017b",
- "ZeroWidthSpace;": u"\u200b",
- "Zeta;": u"\u0396",
- "Zfr;": u"\u2128",
- "Zopf;": u"\u2124",
- "Zscr;": u"\U0001d4b5",
- "aacute": u"\xe1",
- "aacute;": u"\xe1",
- "abreve;": u"\u0103",
- "ac;": u"\u223e",
- "acE;": u"\u223e\u0333",
- "acd;": u"\u223f",
- "acirc": u"\xe2",
- "acirc;": u"\xe2",
- "acute": u"\xb4",
- "acute;": u"\xb4",
- "acy;": u"\u0430",
- "aelig": u"\xe6",
- "aelig;": u"\xe6",
- "af;": u"\u2061",
- "afr;": u"\U0001d51e",
- "agrave": u"\xe0",
- "agrave;": u"\xe0",
- "alefsym;": u"\u2135",
- "aleph;": u"\u2135",
- "alpha;": u"\u03b1",
- "amacr;": u"\u0101",
- "amalg;": u"\u2a3f",
- "amp": u"&",
- "amp;": u"&",
- "and;": u"\u2227",
- "andand;": u"\u2a55",
- "andd;": u"\u2a5c",
- "andslope;": u"\u2a58",
- "andv;": u"\u2a5a",
- "ang;": u"\u2220",
- "ange;": u"\u29a4",
- "angle;": u"\u2220",
- "angmsd;": u"\u2221",
- "angmsdaa;": u"\u29a8",
- "angmsdab;": u"\u29a9",
- "angmsdac;": u"\u29aa",
- "angmsdad;": u"\u29ab",
- "angmsdae;": u"\u29ac",
- "angmsdaf;": u"\u29ad",
- "angmsdag;": u"\u29ae",
- "angmsdah;": u"\u29af",
- "angrt;": u"\u221f",
- "angrtvb;": u"\u22be",
- "angrtvbd;": u"\u299d",
- "angsph;": u"\u2222",
- "angst;": u"\xc5",
- "angzarr;": u"\u237c",
- "aogon;": u"\u0105",
- "aopf;": u"\U0001d552",
- "ap;": u"\u2248",
- "apE;": u"\u2a70",
- "apacir;": u"\u2a6f",
- "ape;": u"\u224a",
- "apid;": u"\u224b",
- "apos;": u"'",
- "approx;": u"\u2248",
- "approxeq;": u"\u224a",
- "aring": u"\xe5",
- "aring;": u"\xe5",
- "ascr;": u"\U0001d4b6",
- "ast;": u"*",
- "asymp;": u"\u2248",
- "asympeq;": u"\u224d",
- "atilde": u"\xe3",
- "atilde;": u"\xe3",
- "auml": u"\xe4",
- "auml;": u"\xe4",
- "awconint;": u"\u2233",
- "awint;": u"\u2a11",
- "bNot;": u"\u2aed",
- "backcong;": u"\u224c",
- "backepsilon;": u"\u03f6",
- "backprime;": u"\u2035",
- "backsim;": u"\u223d",
- "backsimeq;": u"\u22cd",
- "barvee;": u"\u22bd",
- "barwed;": u"\u2305",
- "barwedge;": u"\u2305",
- "bbrk;": u"\u23b5",
- "bbrktbrk;": u"\u23b6",
- "bcong;": u"\u224c",
- "bcy;": u"\u0431",
- "bdquo;": u"\u201e",
- "becaus;": u"\u2235",
- "because;": u"\u2235",
- "bemptyv;": u"\u29b0",
- "bepsi;": u"\u03f6",
- "bernou;": u"\u212c",
- "beta;": u"\u03b2",
- "beth;": u"\u2136",
- "between;": u"\u226c",
- "bfr;": u"\U0001d51f",
- "bigcap;": u"\u22c2",
- "bigcirc;": u"\u25ef",
- "bigcup;": u"\u22c3",
- "bigodot;": u"\u2a00",
- "bigoplus;": u"\u2a01",
- "bigotimes;": u"\u2a02",
- "bigsqcup;": u"\u2a06",
- "bigstar;": u"\u2605",
- "bigtriangledown;": u"\u25bd",
- "bigtriangleup;": u"\u25b3",
- "biguplus;": u"\u2a04",
- "bigvee;": u"\u22c1",
- "bigwedge;": u"\u22c0",
- "bkarow;": u"\u290d",
- "blacklozenge;": u"\u29eb",
- "blacksquare;": u"\u25aa",
- "blacktriangle;": u"\u25b4",
- "blacktriangledown;": u"\u25be",
- "blacktriangleleft;": u"\u25c2",
- "blacktriangleright;": u"\u25b8",
- "blank;": u"\u2423",
- "blk12;": u"\u2592",
- "blk14;": u"\u2591",
- "blk34;": u"\u2593",
- "block;": u"\u2588",
- "bne;": u"=\u20e5",
- "bnequiv;": u"\u2261\u20e5",
- "bnot;": u"\u2310",
- "bopf;": u"\U0001d553",
- "bot;": u"\u22a5",
- "bottom;": u"\u22a5",
- "bowtie;": u"\u22c8",
- "boxDL;": u"\u2557",
- "boxDR;": u"\u2554",
- "boxDl;": u"\u2556",
- "boxDr;": u"\u2553",
- "boxH;": u"\u2550",
- "boxHD;": u"\u2566",
- "boxHU;": u"\u2569",
- "boxHd;": u"\u2564",
- "boxHu;": u"\u2567",
- "boxUL;": u"\u255d",
- "boxUR;": u"\u255a",
- "boxUl;": u"\u255c",
- "boxUr;": u"\u2559",
- "boxV;": u"\u2551",
- "boxVH;": u"\u256c",
- "boxVL;": u"\u2563",
- "boxVR;": u"\u2560",
- "boxVh;": u"\u256b",
- "boxVl;": u"\u2562",
- "boxVr;": u"\u255f",
- "boxbox;": u"\u29c9",
- "boxdL;": u"\u2555",
- "boxdR;": u"\u2552",
- "boxdl;": u"\u2510",
- "boxdr;": u"\u250c",
- "boxh;": u"\u2500",
- "boxhD;": u"\u2565",
- "boxhU;": u"\u2568",
- "boxhd;": u"\u252c",
- "boxhu;": u"\u2534",
- "boxminus;": u"\u229f",
- "boxplus;": u"\u229e",
- "boxtimes;": u"\u22a0",
- "boxuL;": u"\u255b",
- "boxuR;": u"\u2558",
- "boxul;": u"\u2518",
- "boxur;": u"\u2514",
- "boxv;": u"\u2502",
- "boxvH;": u"\u256a",
- "boxvL;": u"\u2561",
- "boxvR;": u"\u255e",
- "boxvh;": u"\u253c",
- "boxvl;": u"\u2524",
- "boxvr;": u"\u251c",
- "bprime;": u"\u2035",
- "breve;": u"\u02d8",
- "brvbar": u"\xa6",
- "brvbar;": u"\xa6",
- "bscr;": u"\U0001d4b7",
- "bsemi;": u"\u204f",
- "bsim;": u"\u223d",
- "bsime;": u"\u22cd",
- "bsol;": u"\\",
- "bsolb;": u"\u29c5",
- "bsolhsub;": u"\u27c8",
- "bull;": u"\u2022",
- "bullet;": u"\u2022",
- "bump;": u"\u224e",
- "bumpE;": u"\u2aae",
- "bumpe;": u"\u224f",
- "bumpeq;": u"\u224f",
- "cacute;": u"\u0107",
- "cap;": u"\u2229",
- "capand;": u"\u2a44",
- "capbrcup;": u"\u2a49",
- "capcap;": u"\u2a4b",
- "capcup;": u"\u2a47",
- "capdot;": u"\u2a40",
- "caps;": u"\u2229\ufe00",
- "caret;": u"\u2041",
- "caron;": u"\u02c7",
- "ccaps;": u"\u2a4d",
- "ccaron;": u"\u010d",
- "ccedil": u"\xe7",
- "ccedil;": u"\xe7",
- "ccirc;": u"\u0109",
- "ccups;": u"\u2a4c",
- "ccupssm;": u"\u2a50",
- "cdot;": u"\u010b",
- "cedil": u"\xb8",
- "cedil;": u"\xb8",
- "cemptyv;": u"\u29b2",
- "cent": u"\xa2",
- "cent;": u"\xa2",
- "centerdot;": u"\xb7",
- "cfr;": u"\U0001d520",
- "chcy;": u"\u0447",
- "check;": u"\u2713",
- "checkmark;": u"\u2713",
- "chi;": u"\u03c7",
- "cir;": u"\u25cb",
- "cirE;": u"\u29c3",
- "circ;": u"\u02c6",
- "circeq;": u"\u2257",
- "circlearrowleft;": u"\u21ba",
- "circlearrowright;": u"\u21bb",
- "circledR;": u"\xae",
- "circledS;": u"\u24c8",
- "circledast;": u"\u229b",
- "circledcirc;": u"\u229a",
- "circleddash;": u"\u229d",
- "cire;": u"\u2257",
- "cirfnint;": u"\u2a10",
- "cirmid;": u"\u2aef",
- "cirscir;": u"\u29c2",
- "clubs;": u"\u2663",
- "clubsuit;": u"\u2663",
- "colon;": u":",
- "colone;": u"\u2254",
- "coloneq;": u"\u2254",
- "comma;": u",",
- "commat;": u"@",
- "comp;": u"\u2201",
- "compfn;": u"\u2218",
- "complement;": u"\u2201",
- "complexes;": u"\u2102",
- "cong;": u"\u2245",
- "congdot;": u"\u2a6d",
- "conint;": u"\u222e",
- "copf;": u"\U0001d554",
- "coprod;": u"\u2210",
- "copy": u"\xa9",
- "copy;": u"\xa9",
- "copysr;": u"\u2117",
- "crarr;": u"\u21b5",
- "cross;": u"\u2717",
- "cscr;": u"\U0001d4b8",
- "csub;": u"\u2acf",
- "csube;": u"\u2ad1",
- "csup;": u"\u2ad0",
- "csupe;": u"\u2ad2",
- "ctdot;": u"\u22ef",
- "cudarrl;": u"\u2938",
- "cudarrr;": u"\u2935",
- "cuepr;": u"\u22de",
- "cuesc;": u"\u22df",
- "cularr;": u"\u21b6",
- "cularrp;": u"\u293d",
- "cup;": u"\u222a",
- "cupbrcap;": u"\u2a48",
- "cupcap;": u"\u2a46",
- "cupcup;": u"\u2a4a",
- "cupdot;": u"\u228d",
- "cupor;": u"\u2a45",
- "cups;": u"\u222a\ufe00",
- "curarr;": u"\u21b7",
- "curarrm;": u"\u293c",
- "curlyeqprec;": u"\u22de",
- "curlyeqsucc;": u"\u22df",
- "curlyvee;": u"\u22ce",
- "curlywedge;": u"\u22cf",
- "curren": u"\xa4",
- "curren;": u"\xa4",
- "curvearrowleft;": u"\u21b6",
- "curvearrowright;": u"\u21b7",
- "cuvee;": u"\u22ce",
- "cuwed;": u"\u22cf",
- "cwconint;": u"\u2232",
- "cwint;": u"\u2231",
- "cylcty;": u"\u232d",
- "dArr;": u"\u21d3",
- "dHar;": u"\u2965",
- "dagger;": u"\u2020",
- "daleth;": u"\u2138",
- "darr;": u"\u2193",
- "dash;": u"\u2010",
- "dashv;": u"\u22a3",
- "dbkarow;": u"\u290f",
- "dblac;": u"\u02dd",
- "dcaron;": u"\u010f",
- "dcy;": u"\u0434",
- "dd;": u"\u2146",
- "ddagger;": u"\u2021",
- "ddarr;": u"\u21ca",
- "ddotseq;": u"\u2a77",
- "deg": u"\xb0",
- "deg;": u"\xb0",
- "delta;": u"\u03b4",
- "demptyv;": u"\u29b1",
- "dfisht;": u"\u297f",
- "dfr;": u"\U0001d521",
- "dharl;": u"\u21c3",
- "dharr;": u"\u21c2",
- "diam;": u"\u22c4",
- "diamond;": u"\u22c4",
- "diamondsuit;": u"\u2666",
- "diams;": u"\u2666",
- "die;": u"\xa8",
- "digamma;": u"\u03dd",
- "disin;": u"\u22f2",
- "div;": u"\xf7",
- "divide": u"\xf7",
- "divide;": u"\xf7",
- "divideontimes;": u"\u22c7",
- "divonx;": u"\u22c7",
- "djcy;": u"\u0452",
- "dlcorn;": u"\u231e",
- "dlcrop;": u"\u230d",
- "dollar;": u"$",
- "dopf;": u"\U0001d555",
- "dot;": u"\u02d9",
- "doteq;": u"\u2250",
- "doteqdot;": u"\u2251",
- "dotminus;": u"\u2238",
- "dotplus;": u"\u2214",
- "dotsquare;": u"\u22a1",
- "doublebarwedge;": u"\u2306",
- "downarrow;": u"\u2193",
- "downdownarrows;": u"\u21ca",
- "downharpoonleft;": u"\u21c3",
- "downharpoonright;": u"\u21c2",
- "drbkarow;": u"\u2910",
- "drcorn;": u"\u231f",
- "drcrop;": u"\u230c",
- "dscr;": u"\U0001d4b9",
- "dscy;": u"\u0455",
- "dsol;": u"\u29f6",
- "dstrok;": u"\u0111",
- "dtdot;": u"\u22f1",
- "dtri;": u"\u25bf",
- "dtrif;": u"\u25be",
- "duarr;": u"\u21f5",
- "duhar;": u"\u296f",
- "dwangle;": u"\u29a6",
- "dzcy;": u"\u045f",
- "dzigrarr;": u"\u27ff",
- "eDDot;": u"\u2a77",
- "eDot;": u"\u2251",
- "eacute": u"\xe9",
- "eacute;": u"\xe9",
- "easter;": u"\u2a6e",
- "ecaron;": u"\u011b",
- "ecir;": u"\u2256",
- "ecirc": u"\xea",
- "ecirc;": u"\xea",
- "ecolon;": u"\u2255",
- "ecy;": u"\u044d",
- "edot;": u"\u0117",
- "ee;": u"\u2147",
- "efDot;": u"\u2252",
- "efr;": u"\U0001d522",
- "eg;": u"\u2a9a",
- "egrave": u"\xe8",
- "egrave;": u"\xe8",
- "egs;": u"\u2a96",
- "egsdot;": u"\u2a98",
- "el;": u"\u2a99",
- "elinters;": u"\u23e7",
- "ell;": u"\u2113",
- "els;": u"\u2a95",
- "elsdot;": u"\u2a97",
- "emacr;": u"\u0113",
- "empty;": u"\u2205",
- "emptyset;": u"\u2205",
- "emptyv;": u"\u2205",
- "emsp13;": u"\u2004",
- "emsp14;": u"\u2005",
- "emsp;": u"\u2003",
- "eng;": u"\u014b",
- "ensp;": u"\u2002",
- "eogon;": u"\u0119",
- "eopf;": u"\U0001d556",
- "epar;": u"\u22d5",
- "eparsl;": u"\u29e3",
- "eplus;": u"\u2a71",
- "epsi;": u"\u03b5",
- "epsilon;": u"\u03b5",
- "epsiv;": u"\u03f5",
- "eqcirc;": u"\u2256",
- "eqcolon;": u"\u2255",
- "eqsim;": u"\u2242",
- "eqslantgtr;": u"\u2a96",
- "eqslantless;": u"\u2a95",
- "equals;": u"=",
- "equest;": u"\u225f",
- "equiv;": u"\u2261",
- "equivDD;": u"\u2a78",
- "eqvparsl;": u"\u29e5",
- "erDot;": u"\u2253",
- "erarr;": u"\u2971",
- "escr;": u"\u212f",
- "esdot;": u"\u2250",
- "esim;": u"\u2242",
- "eta;": u"\u03b7",
- "eth": u"\xf0",
- "eth;": u"\xf0",
- "euml": u"\xeb",
- "euml;": u"\xeb",
- "euro;": u"\u20ac",
- "excl;": u"!",
- "exist;": u"\u2203",
- "expectation;": u"\u2130",
- "exponentiale;": u"\u2147",
- "fallingdotseq;": u"\u2252",
- "fcy;": u"\u0444",
- "female;": u"\u2640",
- "ffilig;": u"\ufb03",
- "fflig;": u"\ufb00",
- "ffllig;": u"\ufb04",
- "ffr;": u"\U0001d523",
- "filig;": u"\ufb01",
- "fjlig;": u"fj",
- "flat;": u"\u266d",
- "fllig;": u"\ufb02",
- "fltns;": u"\u25b1",
- "fnof;": u"\u0192",
- "fopf;": u"\U0001d557",
- "forall;": u"\u2200",
- "fork;": u"\u22d4",
- "forkv;": u"\u2ad9",
- "fpartint;": u"\u2a0d",
- "frac12": u"\xbd",
- "frac12;": u"\xbd",
- "frac13;": u"\u2153",
- "frac14": u"\xbc",
- "frac14;": u"\xbc",
- "frac15;": u"\u2155",
- "frac16;": u"\u2159",
- "frac18;": u"\u215b",
- "frac23;": u"\u2154",
- "frac25;": u"\u2156",
- "frac34": u"\xbe",
- "frac34;": u"\xbe",
- "frac35;": u"\u2157",
- "frac38;": u"\u215c",
- "frac45;": u"\u2158",
- "frac56;": u"\u215a",
- "frac58;": u"\u215d",
- "frac78;": u"\u215e",
- "frasl;": u"\u2044",
- "frown;": u"\u2322",
- "fscr;": u"\U0001d4bb",
- "gE;": u"\u2267",
- "gEl;": u"\u2a8c",
- "gacute;": u"\u01f5",
- "gamma;": u"\u03b3",
- "gammad;": u"\u03dd",
- "gap;": u"\u2a86",
- "gbreve;": u"\u011f",
- "gcirc;": u"\u011d",
- "gcy;": u"\u0433",
- "gdot;": u"\u0121",
- "ge;": u"\u2265",
- "gel;": u"\u22db",
- "geq;": u"\u2265",
- "geqq;": u"\u2267",
- "geqslant;": u"\u2a7e",
- "ges;": u"\u2a7e",
- "gescc;": u"\u2aa9",
- "gesdot;": u"\u2a80",
- "gesdoto;": u"\u2a82",
- "gesdotol;": u"\u2a84",
- "gesl;": u"\u22db\ufe00",
- "gesles;": u"\u2a94",
- "gfr;": u"\U0001d524",
- "gg;": u"\u226b",
- "ggg;": u"\u22d9",
- "gimel;": u"\u2137",
- "gjcy;": u"\u0453",
- "gl;": u"\u2277",
- "glE;": u"\u2a92",
- "gla;": u"\u2aa5",
- "glj;": u"\u2aa4",
- "gnE;": u"\u2269",
- "gnap;": u"\u2a8a",
- "gnapprox;": u"\u2a8a",
- "gne;": u"\u2a88",
- "gneq;": u"\u2a88",
- "gneqq;": u"\u2269",
- "gnsim;": u"\u22e7",
- "gopf;": u"\U0001d558",
- "grave;": u"`",
- "gscr;": u"\u210a",
- "gsim;": u"\u2273",
- "gsime;": u"\u2a8e",
- "gsiml;": u"\u2a90",
- "gt": u">",
- "gt;": u">",
- "gtcc;": u"\u2aa7",
- "gtcir;": u"\u2a7a",
- "gtdot;": u"\u22d7",
- "gtlPar;": u"\u2995",
- "gtquest;": u"\u2a7c",
- "gtrapprox;": u"\u2a86",
- "gtrarr;": u"\u2978",
- "gtrdot;": u"\u22d7",
- "gtreqless;": u"\u22db",
- "gtreqqless;": u"\u2a8c",
- "gtrless;": u"\u2277",
- "gtrsim;": u"\u2273",
- "gvertneqq;": u"\u2269\ufe00",
- "gvnE;": u"\u2269\ufe00",
- "hArr;": u"\u21d4",
- "hairsp;": u"\u200a",
- "half;": u"\xbd",
- "hamilt;": u"\u210b",
- "hardcy;": u"\u044a",
- "harr;": u"\u2194",
- "harrcir;": u"\u2948",
- "harrw;": u"\u21ad",
- "hbar;": u"\u210f",
- "hcirc;": u"\u0125",
- "hearts;": u"\u2665",
- "heartsuit;": u"\u2665",
- "hellip;": u"\u2026",
- "hercon;": u"\u22b9",
- "hfr;": u"\U0001d525",
- "hksearow;": u"\u2925",
- "hkswarow;": u"\u2926",
- "hoarr;": u"\u21ff",
- "homtht;": u"\u223b",
- "hookleftarrow;": u"\u21a9",
- "hookrightarrow;": u"\u21aa",
- "hopf;": u"\U0001d559",
- "horbar;": u"\u2015",
- "hscr;": u"\U0001d4bd",
- "hslash;": u"\u210f",
- "hstrok;": u"\u0127",
- "hybull;": u"\u2043",
- "hyphen;": u"\u2010",
- "iacute": u"\xed",
- "iacute;": u"\xed",
- "ic;": u"\u2063",
- "icirc": u"\xee",
- "icirc;": u"\xee",
- "icy;": u"\u0438",
- "iecy;": u"\u0435",
- "iexcl": u"\xa1",
- "iexcl;": u"\xa1",
- "iff;": u"\u21d4",
- "ifr;": u"\U0001d526",
- "igrave": u"\xec",
- "igrave;": u"\xec",
- "ii;": u"\u2148",
- "iiiint;": u"\u2a0c",
- "iiint;": u"\u222d",
- "iinfin;": u"\u29dc",
- "iiota;": u"\u2129",
- "ijlig;": u"\u0133",
- "imacr;": u"\u012b",
- "image;": u"\u2111",
- "imagline;": u"\u2110",
- "imagpart;": u"\u2111",
- "imath;": u"\u0131",
- "imof;": u"\u22b7",
- "imped;": u"\u01b5",
- "in;": u"\u2208",
- "incare;": u"\u2105",
- "infin;": u"\u221e",
- "infintie;": u"\u29dd",
- "inodot;": u"\u0131",
- "int;": u"\u222b",
- "intcal;": u"\u22ba",
- "integers;": u"\u2124",
- "intercal;": u"\u22ba",
- "intlarhk;": u"\u2a17",
- "intprod;": u"\u2a3c",
- "iocy;": u"\u0451",
- "iogon;": u"\u012f",
- "iopf;": u"\U0001d55a",
- "iota;": u"\u03b9",
- "iprod;": u"\u2a3c",
- "iquest": u"\xbf",
- "iquest;": u"\xbf",
- "iscr;": u"\U0001d4be",
- "isin;": u"\u2208",
- "isinE;": u"\u22f9",
- "isindot;": u"\u22f5",
- "isins;": u"\u22f4",
- "isinsv;": u"\u22f3",
- "isinv;": u"\u2208",
- "it;": u"\u2062",
- "itilde;": u"\u0129",
- "iukcy;": u"\u0456",
- "iuml": u"\xef",
- "iuml;": u"\xef",
- "jcirc;": u"\u0135",
- "jcy;": u"\u0439",
- "jfr;": u"\U0001d527",
- "jmath;": u"\u0237",
- "jopf;": u"\U0001d55b",
- "jscr;": u"\U0001d4bf",
- "jsercy;": u"\u0458",
- "jukcy;": u"\u0454",
- "kappa;": u"\u03ba",
- "kappav;": u"\u03f0",
- "kcedil;": u"\u0137",
- "kcy;": u"\u043a",
- "kfr;": u"\U0001d528",
- "kgreen;": u"\u0138",
- "khcy;": u"\u0445",
- "kjcy;": u"\u045c",
- "kopf;": u"\U0001d55c",
- "kscr;": u"\U0001d4c0",
- "lAarr;": u"\u21da",
- "lArr;": u"\u21d0",
- "lAtail;": u"\u291b",
- "lBarr;": u"\u290e",
- "lE;": u"\u2266",
- "lEg;": u"\u2a8b",
- "lHar;": u"\u2962",
- "lacute;": u"\u013a",
- "laemptyv;": u"\u29b4",
- "lagran;": u"\u2112",
- "lambda;": u"\u03bb",
- "lang;": u"\u27e8",
- "langd;": u"\u2991",
- "langle;": u"\u27e8",
- "lap;": u"\u2a85",
- "laquo": u"\xab",
- "laquo;": u"\xab",
- "larr;": u"\u2190",
- "larrb;": u"\u21e4",
- "larrbfs;": u"\u291f",
- "larrfs;": u"\u291d",
- "larrhk;": u"\u21a9",
- "larrlp;": u"\u21ab",
- "larrpl;": u"\u2939",
- "larrsim;": u"\u2973",
- "larrtl;": u"\u21a2",
- "lat;": u"\u2aab",
- "latail;": u"\u2919",
- "late;": u"\u2aad",
- "lates;": u"\u2aad\ufe00",
- "lbarr;": u"\u290c",
- "lbbrk;": u"\u2772",
- "lbrace;": u"{",
- "lbrack;": u"[",
- "lbrke;": u"\u298b",
- "lbrksld;": u"\u298f",
- "lbrkslu;": u"\u298d",
- "lcaron;": u"\u013e",
- "lcedil;": u"\u013c",
- "lceil;": u"\u2308",
- "lcub;": u"{",
- "lcy;": u"\u043b",
- "ldca;": u"\u2936",
- "ldquo;": u"\u201c",
- "ldquor;": u"\u201e",
- "ldrdhar;": u"\u2967",
- "ldrushar;": u"\u294b",
- "ldsh;": u"\u21b2",
- "le;": u"\u2264",
- "leftarrow;": u"\u2190",
- "leftarrowtail;": u"\u21a2",
- "leftharpoondown;": u"\u21bd",
- "leftharpoonup;": u"\u21bc",
- "leftleftarrows;": u"\u21c7",
- "leftrightarrow;": u"\u2194",
- "leftrightarrows;": u"\u21c6",
- "leftrightharpoons;": u"\u21cb",
- "leftrightsquigarrow;": u"\u21ad",
- "leftthreetimes;": u"\u22cb",
- "leg;": u"\u22da",
- "leq;": u"\u2264",
- "leqq;": u"\u2266",
- "leqslant;": u"\u2a7d",
- "les;": u"\u2a7d",
- "lescc;": u"\u2aa8",
- "lesdot;": u"\u2a7f",
- "lesdoto;": u"\u2a81",
- "lesdotor;": u"\u2a83",
- "lesg;": u"\u22da\ufe00",
- "lesges;": u"\u2a93",
- "lessapprox;": u"\u2a85",
- "lessdot;": u"\u22d6",
- "lesseqgtr;": u"\u22da",
- "lesseqqgtr;": u"\u2a8b",
- "lessgtr;": u"\u2276",
- "lesssim;": u"\u2272",
- "lfisht;": u"\u297c",
- "lfloor;": u"\u230a",
- "lfr;": u"\U0001d529",
- "lg;": u"\u2276",
- "lgE;": u"\u2a91",
- "lhard;": u"\u21bd",
- "lharu;": u"\u21bc",
- "lharul;": u"\u296a",
- "lhblk;": u"\u2584",
- "ljcy;": u"\u0459",
- "ll;": u"\u226a",
- "llarr;": u"\u21c7",
- "llcorner;": u"\u231e",
- "llhard;": u"\u296b",
- "lltri;": u"\u25fa",
- "lmidot;": u"\u0140",
- "lmoust;": u"\u23b0",
- "lmoustache;": u"\u23b0",
- "lnE;": u"\u2268",
- "lnap;": u"\u2a89",
- "lnapprox;": u"\u2a89",
- "lne;": u"\u2a87",
- "lneq;": u"\u2a87",
- "lneqq;": u"\u2268",
- "lnsim;": u"\u22e6",
- "loang;": u"\u27ec",
- "loarr;": u"\u21fd",
- "lobrk;": u"\u27e6",
- "longleftarrow;": u"\u27f5",
- "longleftrightarrow;": u"\u27f7",
- "longmapsto;": u"\u27fc",
- "longrightarrow;": u"\u27f6",
- "looparrowleft;": u"\u21ab",
- "looparrowright;": u"\u21ac",
- "lopar;": u"\u2985",
- "lopf;": u"\U0001d55d",
- "loplus;": u"\u2a2d",
- "lotimes;": u"\u2a34",
- "lowast;": u"\u2217",
- "lowbar;": u"_",
- "loz;": u"\u25ca",
- "lozenge;": u"\u25ca",
- "lozf;": u"\u29eb",
- "lpar;": u"(",
- "lparlt;": u"\u2993",
- "lrarr;": u"\u21c6",
- "lrcorner;": u"\u231f",
- "lrhar;": u"\u21cb",
- "lrhard;": u"\u296d",
- "lrm;": u"\u200e",
- "lrtri;": u"\u22bf",
- "lsaquo;": u"\u2039",
- "lscr;": u"\U0001d4c1",
- "lsh;": u"\u21b0",
- "lsim;": u"\u2272",
- "lsime;": u"\u2a8d",
- "lsimg;": u"\u2a8f",
- "lsqb;": u"[",
- "lsquo;": u"\u2018",
- "lsquor;": u"\u201a",
- "lstrok;": u"\u0142",
- "lt": u"<",
- "lt;": u"<",
- "ltcc;": u"\u2aa6",
- "ltcir;": u"\u2a79",
- "ltdot;": u"\u22d6",
- "lthree;": u"\u22cb",
- "ltimes;": u"\u22c9",
- "ltlarr;": u"\u2976",
- "ltquest;": u"\u2a7b",
- "ltrPar;": u"\u2996",
- "ltri;": u"\u25c3",
- "ltrie;": u"\u22b4",
- "ltrif;": u"\u25c2",
- "lurdshar;": u"\u294a",
- "luruhar;": u"\u2966",
- "lvertneqq;": u"\u2268\ufe00",
- "lvnE;": u"\u2268\ufe00",
- "mDDot;": u"\u223a",
- "macr": u"\xaf",
- "macr;": u"\xaf",
- "male;": u"\u2642",
- "malt;": u"\u2720",
- "maltese;": u"\u2720",
- "map;": u"\u21a6",
- "mapsto;": u"\u21a6",
- "mapstodown;": u"\u21a7",
- "mapstoleft;": u"\u21a4",
- "mapstoup;": u"\u21a5",
- "marker;": u"\u25ae",
- "mcomma;": u"\u2a29",
- "mcy;": u"\u043c",
- "mdash;": u"\u2014",
- "measuredangle;": u"\u2221",
- "mfr;": u"\U0001d52a",
- "mho;": u"\u2127",
- "micro": u"\xb5",
- "micro;": u"\xb5",
- "mid;": u"\u2223",
- "midast;": u"*",
- "midcir;": u"\u2af0",
- "middot": u"\xb7",
- "middot;": u"\xb7",
- "minus;": u"\u2212",
- "minusb;": u"\u229f",
- "minusd;": u"\u2238",
- "minusdu;": u"\u2a2a",
- "mlcp;": u"\u2adb",
- "mldr;": u"\u2026",
- "mnplus;": u"\u2213",
- "models;": u"\u22a7",
- "mopf;": u"\U0001d55e",
- "mp;": u"\u2213",
- "mscr;": u"\U0001d4c2",
- "mstpos;": u"\u223e",
- "mu;": u"\u03bc",
- "multimap;": u"\u22b8",
- "mumap;": u"\u22b8",
- "nGg;": u"\u22d9\u0338",
- "nGt;": u"\u226b\u20d2",
- "nGtv;": u"\u226b\u0338",
- "nLeftarrow;": u"\u21cd",
- "nLeftrightarrow;": u"\u21ce",
- "nLl;": u"\u22d8\u0338",
- "nLt;": u"\u226a\u20d2",
- "nLtv;": u"\u226a\u0338",
- "nRightarrow;": u"\u21cf",
- "nVDash;": u"\u22af",
- "nVdash;": u"\u22ae",
- "nabla;": u"\u2207",
- "nacute;": u"\u0144",
- "nang;": u"\u2220\u20d2",
- "nap;": u"\u2249",
- "napE;": u"\u2a70\u0338",
- "napid;": u"\u224b\u0338",
- "napos;": u"\u0149",
- "napprox;": u"\u2249",
- "natur;": u"\u266e",
- "natural;": u"\u266e",
- "naturals;": u"\u2115",
- "nbsp": u"\xa0",
- "nbsp;": u"\xa0",
- "nbump;": u"\u224e\u0338",
- "nbumpe;": u"\u224f\u0338",
- "ncap;": u"\u2a43",
- "ncaron;": u"\u0148",
- "ncedil;": u"\u0146",
- "ncong;": u"\u2247",
- "ncongdot;": u"\u2a6d\u0338",
- "ncup;": u"\u2a42",
- "ncy;": u"\u043d",
- "ndash;": u"\u2013",
- "ne;": u"\u2260",
- "neArr;": u"\u21d7",
- "nearhk;": u"\u2924",
- "nearr;": u"\u2197",
- "nearrow;": u"\u2197",
- "nedot;": u"\u2250\u0338",
- "nequiv;": u"\u2262",
- "nesear;": u"\u2928",
- "nesim;": u"\u2242\u0338",
- "nexist;": u"\u2204",
- "nexists;": u"\u2204",
- "nfr;": u"\U0001d52b",
- "ngE;": u"\u2267\u0338",
- "nge;": u"\u2271",
- "ngeq;": u"\u2271",
- "ngeqq;": u"\u2267\u0338",
- "ngeqslant;": u"\u2a7e\u0338",
- "nges;": u"\u2a7e\u0338",
- "ngsim;": u"\u2275",
- "ngt;": u"\u226f",
- "ngtr;": u"\u226f",
- "nhArr;": u"\u21ce",
- "nharr;": u"\u21ae",
- "nhpar;": u"\u2af2",
- "ni;": u"\u220b",
- "nis;": u"\u22fc",
- "nisd;": u"\u22fa",
- "niv;": u"\u220b",
- "njcy;": u"\u045a",
- "nlArr;": u"\u21cd",
- "nlE;": u"\u2266\u0338",
- "nlarr;": u"\u219a",
- "nldr;": u"\u2025",
- "nle;": u"\u2270",
- "nleftarrow;": u"\u219a",
- "nleftrightarrow;": u"\u21ae",
- "nleq;": u"\u2270",
- "nleqq;": u"\u2266\u0338",
- "nleqslant;": u"\u2a7d\u0338",
- "nles;": u"\u2a7d\u0338",
- "nless;": u"\u226e",
- "nlsim;": u"\u2274",
- "nlt;": u"\u226e",
- "nltri;": u"\u22ea",
- "nltrie;": u"\u22ec",
- "nmid;": u"\u2224",
- "nopf;": u"\U0001d55f",
- "not": u"\xac",
- "not;": u"\xac",
- "notin;": u"\u2209",
- "notinE;": u"\u22f9\u0338",
- "notindot;": u"\u22f5\u0338",
- "notinva;": u"\u2209",
- "notinvb;": u"\u22f7",
- "notinvc;": u"\u22f6",
- "notni;": u"\u220c",
- "notniva;": u"\u220c",
- "notnivb;": u"\u22fe",
- "notnivc;": u"\u22fd",
- "npar;": u"\u2226",
- "nparallel;": u"\u2226",
- "nparsl;": u"\u2afd\u20e5",
- "npart;": u"\u2202\u0338",
- "npolint;": u"\u2a14",
- "npr;": u"\u2280",
- "nprcue;": u"\u22e0",
- "npre;": u"\u2aaf\u0338",
- "nprec;": u"\u2280",
- "npreceq;": u"\u2aaf\u0338",
- "nrArr;": u"\u21cf",
- "nrarr;": u"\u219b",
- "nrarrc;": u"\u2933\u0338",
- "nrarrw;": u"\u219d\u0338",
- "nrightarrow;": u"\u219b",
- "nrtri;": u"\u22eb",
- "nrtrie;": u"\u22ed",
- "nsc;": u"\u2281",
- "nsccue;": u"\u22e1",
- "nsce;": u"\u2ab0\u0338",
- "nscr;": u"\U0001d4c3",
- "nshortmid;": u"\u2224",
- "nshortparallel;": u"\u2226",
- "nsim;": u"\u2241",
- "nsime;": u"\u2244",
- "nsimeq;": u"\u2244",
- "nsmid;": u"\u2224",
- "nspar;": u"\u2226",
- "nsqsube;": u"\u22e2",
- "nsqsupe;": u"\u22e3",
- "nsub;": u"\u2284",
- "nsubE;": u"\u2ac5\u0338",
- "nsube;": u"\u2288",
- "nsubset;": u"\u2282\u20d2",
- "nsubseteq;": u"\u2288",
- "nsubseteqq;": u"\u2ac5\u0338",
- "nsucc;": u"\u2281",
- "nsucceq;": u"\u2ab0\u0338",
- "nsup;": u"\u2285",
- "nsupE;": u"\u2ac6\u0338",
- "nsupe;": u"\u2289",
- "nsupset;": u"\u2283\u20d2",
- "nsupseteq;": u"\u2289",
- "nsupseteqq;": u"\u2ac6\u0338",
- "ntgl;": u"\u2279",
- "ntilde": u"\xf1",
- "ntilde;": u"\xf1",
- "ntlg;": u"\u2278",
- "ntriangleleft;": u"\u22ea",
- "ntrianglelefteq;": u"\u22ec",
- "ntriangleright;": u"\u22eb",
- "ntrianglerighteq;": u"\u22ed",
- "nu;": u"\u03bd",
- "num;": u"#",
- "numero;": u"\u2116",
- "numsp;": u"\u2007",
- "nvDash;": u"\u22ad",
- "nvHarr;": u"\u2904",
- "nvap;": u"\u224d\u20d2",
- "nvdash;": u"\u22ac",
- "nvge;": u"\u2265\u20d2",
- "nvgt;": u">\u20d2",
- "nvinfin;": u"\u29de",
- "nvlArr;": u"\u2902",
- "nvle;": u"\u2264\u20d2",
- "nvlt;": u"<\u20d2",
- "nvltrie;": u"\u22b4\u20d2",
- "nvrArr;": u"\u2903",
- "nvrtrie;": u"\u22b5\u20d2",
- "nvsim;": u"\u223c\u20d2",
- "nwArr;": u"\u21d6",
- "nwarhk;": u"\u2923",
- "nwarr;": u"\u2196",
- "nwarrow;": u"\u2196",
- "nwnear;": u"\u2927",
- "oS;": u"\u24c8",
- "oacute": u"\xf3",
- "oacute;": u"\xf3",
- "oast;": u"\u229b",
- "ocir;": u"\u229a",
- "ocirc": u"\xf4",
- "ocirc;": u"\xf4",
- "ocy;": u"\u043e",
- "odash;": u"\u229d",
- "odblac;": u"\u0151",
- "odiv;": u"\u2a38",
- "odot;": u"\u2299",
- "odsold;": u"\u29bc",
- "oelig;": u"\u0153",
- "ofcir;": u"\u29bf",
- "ofr;": u"\U0001d52c",
- "ogon;": u"\u02db",
- "ograve": u"\xf2",
- "ograve;": u"\xf2",
- "ogt;": u"\u29c1",
- "ohbar;": u"\u29b5",
- "ohm;": u"\u03a9",
- "oint;": u"\u222e",
- "olarr;": u"\u21ba",
- "olcir;": u"\u29be",
- "olcross;": u"\u29bb",
- "oline;": u"\u203e",
- "olt;": u"\u29c0",
- "omacr;": u"\u014d",
- "omega;": u"\u03c9",
- "omicron;": u"\u03bf",
- "omid;": u"\u29b6",
- "ominus;": u"\u2296",
- "oopf;": u"\U0001d560",
- "opar;": u"\u29b7",
- "operp;": u"\u29b9",
- "oplus;": u"\u2295",
- "or;": u"\u2228",
- "orarr;": u"\u21bb",
- "ord;": u"\u2a5d",
- "order;": u"\u2134",
- "orderof;": u"\u2134",
- "ordf": u"\xaa",
- "ordf;": u"\xaa",
- "ordm": u"\xba",
- "ordm;": u"\xba",
- "origof;": u"\u22b6",
- "oror;": u"\u2a56",
- "orslope;": u"\u2a57",
- "orv;": u"\u2a5b",
- "oscr;": u"\u2134",
- "oslash": u"\xf8",
- "oslash;": u"\xf8",
- "osol;": u"\u2298",
- "otilde": u"\xf5",
- "otilde;": u"\xf5",
- "otimes;": u"\u2297",
- "otimesas;": u"\u2a36",
- "ouml": u"\xf6",
- "ouml;": u"\xf6",
- "ovbar;": u"\u233d",
- "par;": u"\u2225",
- "para": u"\xb6",
- "para;": u"\xb6",
- "parallel;": u"\u2225",
- "parsim;": u"\u2af3",
- "parsl;": u"\u2afd",
- "part;": u"\u2202",
- "pcy;": u"\u043f",
- "percnt;": u"%",
- "period;": u".",
- "permil;": u"\u2030",
- "perp;": u"\u22a5",
- "pertenk;": u"\u2031",
- "pfr;": u"\U0001d52d",
- "phi;": u"\u03c6",
- "phiv;": u"\u03d5",
- "phmmat;": u"\u2133",
- "phone;": u"\u260e",
- "pi;": u"\u03c0",
- "pitchfork;": u"\u22d4",
- "piv;": u"\u03d6",
- "planck;": u"\u210f",
- "planckh;": u"\u210e",
- "plankv;": u"\u210f",
- "plus;": u"+",
- "plusacir;": u"\u2a23",
- "plusb;": u"\u229e",
- "pluscir;": u"\u2a22",
- "plusdo;": u"\u2214",
- "plusdu;": u"\u2a25",
- "pluse;": u"\u2a72",
- "plusmn": u"\xb1",
- "plusmn;": u"\xb1",
- "plussim;": u"\u2a26",
- "plustwo;": u"\u2a27",
- "pm;": u"\xb1",
- "pointint;": u"\u2a15",
- "popf;": u"\U0001d561",
- "pound": u"\xa3",
- "pound;": u"\xa3",
- "pr;": u"\u227a",
- "prE;": u"\u2ab3",
- "prap;": u"\u2ab7",
- "prcue;": u"\u227c",
- "pre;": u"\u2aaf",
- "prec;": u"\u227a",
- "precapprox;": u"\u2ab7",
- "preccurlyeq;": u"\u227c",
- "preceq;": u"\u2aaf",
- "precnapprox;": u"\u2ab9",
- "precneqq;": u"\u2ab5",
- "precnsim;": u"\u22e8",
- "precsim;": u"\u227e",
- "prime;": u"\u2032",
- "primes;": u"\u2119",
- "prnE;": u"\u2ab5",
- "prnap;": u"\u2ab9",
- "prnsim;": u"\u22e8",
- "prod;": u"\u220f",
- "profalar;": u"\u232e",
- "profline;": u"\u2312",
- "profsurf;": u"\u2313",
- "prop;": u"\u221d",
- "propto;": u"\u221d",
- "prsim;": u"\u227e",
- "prurel;": u"\u22b0",
- "pscr;": u"\U0001d4c5",
- "psi;": u"\u03c8",
- "puncsp;": u"\u2008",
- "qfr;": u"\U0001d52e",
- "qint;": u"\u2a0c",
- "qopf;": u"\U0001d562",
- "qprime;": u"\u2057",
- "qscr;": u"\U0001d4c6",
- "quaternions;": u"\u210d",
- "quatint;": u"\u2a16",
- "quest;": u"?",
- "questeq;": u"\u225f",
- "quot": u"\"",
- "quot;": u"\"",
- "rAarr;": u"\u21db",
- "rArr;": u"\u21d2",
- "rAtail;": u"\u291c",
- "rBarr;": u"\u290f",
- "rHar;": u"\u2964",
- "race;": u"\u223d\u0331",
- "racute;": u"\u0155",
- "radic;": u"\u221a",
- "raemptyv;": u"\u29b3",
- "rang;": u"\u27e9",
- "rangd;": u"\u2992",
- "range;": u"\u29a5",
- "rangle;": u"\u27e9",
- "raquo": u"\xbb",
- "raquo;": u"\xbb",
- "rarr;": u"\u2192",
- "rarrap;": u"\u2975",
- "rarrb;": u"\u21e5",
- "rarrbfs;": u"\u2920",
- "rarrc;": u"\u2933",
- "rarrfs;": u"\u291e",
- "rarrhk;": u"\u21aa",
- "rarrlp;": u"\u21ac",
- "rarrpl;": u"\u2945",
- "rarrsim;": u"\u2974",
- "rarrtl;": u"\u21a3",
- "rarrw;": u"\u219d",
- "ratail;": u"\u291a",
- "ratio;": u"\u2236",
- "rationals;": u"\u211a",
- "rbarr;": u"\u290d",
- "rbbrk;": u"\u2773",
- "rbrace;": u"}",
- "rbrack;": u"]",
- "rbrke;": u"\u298c",
- "rbrksld;": u"\u298e",
- "rbrkslu;": u"\u2990",
- "rcaron;": u"\u0159",
- "rcedil;": u"\u0157",
- "rceil;": u"\u2309",
- "rcub;": u"}",
- "rcy;": u"\u0440",
- "rdca;": u"\u2937",
- "rdldhar;": u"\u2969",
- "rdquo;": u"\u201d",
- "rdquor;": u"\u201d",
- "rdsh;": u"\u21b3",
- "real;": u"\u211c",
- "realine;": u"\u211b",
- "realpart;": u"\u211c",
- "reals;": u"\u211d",
- "rect;": u"\u25ad",
- "reg": u"\xae",
- "reg;": u"\xae",
- "rfisht;": u"\u297d",
- "rfloor;": u"\u230b",
- "rfr;": u"\U0001d52f",
- "rhard;": u"\u21c1",
- "rharu;": u"\u21c0",
- "rharul;": u"\u296c",
- "rho;": u"\u03c1",
- "rhov;": u"\u03f1",
- "rightarrow;": u"\u2192",
- "rightarrowtail;": u"\u21a3",
- "rightharpoondown;": u"\u21c1",
- "rightharpoonup;": u"\u21c0",
- "rightleftarrows;": u"\u21c4",
- "rightleftharpoons;": u"\u21cc",
- "rightrightarrows;": u"\u21c9",
- "rightsquigarrow;": u"\u219d",
- "rightthreetimes;": u"\u22cc",
- "ring;": u"\u02da",
- "risingdotseq;": u"\u2253",
- "rlarr;": u"\u21c4",
- "rlhar;": u"\u21cc",
- "rlm;": u"\u200f",
- "rmoust;": u"\u23b1",
- "rmoustache;": u"\u23b1",
- "rnmid;": u"\u2aee",
- "roang;": u"\u27ed",
- "roarr;": u"\u21fe",
- "robrk;": u"\u27e7",
- "ropar;": u"\u2986",
- "ropf;": u"\U0001d563",
- "roplus;": u"\u2a2e",
- "rotimes;": u"\u2a35",
- "rpar;": u")",
- "rpargt;": u"\u2994",
- "rppolint;": u"\u2a12",
- "rrarr;": u"\u21c9",
- "rsaquo;": u"\u203a",
- "rscr;": u"\U0001d4c7",
- "rsh;": u"\u21b1",
- "rsqb;": u"]",
- "rsquo;": u"\u2019",
- "rsquor;": u"\u2019",
- "rthree;": u"\u22cc",
- "rtimes;": u"\u22ca",
- "rtri;": u"\u25b9",
- "rtrie;": u"\u22b5",
- "rtrif;": u"\u25b8",
- "rtriltri;": u"\u29ce",
- "ruluhar;": u"\u2968",
- "rx;": u"\u211e",
- "sacute;": u"\u015b",
- "sbquo;": u"\u201a",
- "sc;": u"\u227b",
- "scE;": u"\u2ab4",
- "scap;": u"\u2ab8",
- "scaron;": u"\u0161",
- "sccue;": u"\u227d",
- "sce;": u"\u2ab0",
- "scedil;": u"\u015f",
- "scirc;": u"\u015d",
- "scnE;": u"\u2ab6",
- "scnap;": u"\u2aba",
- "scnsim;": u"\u22e9",
- "scpolint;": u"\u2a13",
- "scsim;": u"\u227f",
- "scy;": u"\u0441",
- "sdot;": u"\u22c5",
- "sdotb;": u"\u22a1",
- "sdote;": u"\u2a66",
- "seArr;": u"\u21d8",
- "searhk;": u"\u2925",
- "searr;": u"\u2198",
- "searrow;": u"\u2198",
- "sect": u"\xa7",
- "sect;": u"\xa7",
- "semi;": u";",
- "seswar;": u"\u2929",
- "setminus;": u"\u2216",
- "setmn;": u"\u2216",
- "sext;": u"\u2736",
- "sfr;": u"\U0001d530",
- "sfrown;": u"\u2322",
- "sharp;": u"\u266f",
- "shchcy;": u"\u0449",
- "shcy;": u"\u0448",
- "shortmid;": u"\u2223",
- "shortparallel;": u"\u2225",
- "shy": u"\xad",
- "shy;": u"\xad",
- "sigma;": u"\u03c3",
- "sigmaf;": u"\u03c2",
- "sigmav;": u"\u03c2",
- "sim;": u"\u223c",
- "simdot;": u"\u2a6a",
- "sime;": u"\u2243",
- "simeq;": u"\u2243",
- "simg;": u"\u2a9e",
- "simgE;": u"\u2aa0",
- "siml;": u"\u2a9d",
- "simlE;": u"\u2a9f",
- "simne;": u"\u2246",
- "simplus;": u"\u2a24",
- "simrarr;": u"\u2972",
- "slarr;": u"\u2190",
- "smallsetminus;": u"\u2216",
- "smashp;": u"\u2a33",
- "smeparsl;": u"\u29e4",
- "smid;": u"\u2223",
- "smile;": u"\u2323",
- "smt;": u"\u2aaa",
- "smte;": u"\u2aac",
- "smtes;": u"\u2aac\ufe00",
- "softcy;": u"\u044c",
- "sol;": u"/",
- "solb;": u"\u29c4",
- "solbar;": u"\u233f",
- "sopf;": u"\U0001d564",
- "spades;": u"\u2660",
- "spadesuit;": u"\u2660",
- "spar;": u"\u2225",
- "sqcap;": u"\u2293",
- "sqcaps;": u"\u2293\ufe00",
- "sqcup;": u"\u2294",
- "sqcups;": u"\u2294\ufe00",
- "sqsub;": u"\u228f",
- "sqsube;": u"\u2291",
- "sqsubset;": u"\u228f",
- "sqsubseteq;": u"\u2291",
- "sqsup;": u"\u2290",
- "sqsupe;": u"\u2292",
- "sqsupset;": u"\u2290",
- "sqsupseteq;": u"\u2292",
- "squ;": u"\u25a1",
- "square;": u"\u25a1",
- "squarf;": u"\u25aa",
- "squf;": u"\u25aa",
- "srarr;": u"\u2192",
- "sscr;": u"\U0001d4c8",
- "ssetmn;": u"\u2216",
- "ssmile;": u"\u2323",
- "sstarf;": u"\u22c6",
- "star;": u"\u2606",
- "starf;": u"\u2605",
- "straightepsilon;": u"\u03f5",
- "straightphi;": u"\u03d5",
- "strns;": u"\xaf",
- "sub;": u"\u2282",
- "subE;": u"\u2ac5",
- "subdot;": u"\u2abd",
- "sube;": u"\u2286",
- "subedot;": u"\u2ac3",
- "submult;": u"\u2ac1",
- "subnE;": u"\u2acb",
- "subne;": u"\u228a",
- "subplus;": u"\u2abf",
- "subrarr;": u"\u2979",
- "subset;": u"\u2282",
- "subseteq;": u"\u2286",
- "subseteqq;": u"\u2ac5",
- "subsetneq;": u"\u228a",
- "subsetneqq;": u"\u2acb",
- "subsim;": u"\u2ac7",
- "subsub;": u"\u2ad5",
- "subsup;": u"\u2ad3",
- "succ;": u"\u227b",
- "succapprox;": u"\u2ab8",
- "succcurlyeq;": u"\u227d",
- "succeq;": u"\u2ab0",
- "succnapprox;": u"\u2aba",
- "succneqq;": u"\u2ab6",
- "succnsim;": u"\u22e9",
- "succsim;": u"\u227f",
- "sum;": u"\u2211",
- "sung;": u"\u266a",
- "sup1": u"\xb9",
- "sup1;": u"\xb9",
- "sup2": u"\xb2",
- "sup2;": u"\xb2",
- "sup3": u"\xb3",
- "sup3;": u"\xb3",
- "sup;": u"\u2283",
- "supE;": u"\u2ac6",
- "supdot;": u"\u2abe",
- "supdsub;": u"\u2ad8",
- "supe;": u"\u2287",
- "supedot;": u"\u2ac4",
- "suphsol;": u"\u27c9",
- "suphsub;": u"\u2ad7",
- "suplarr;": u"\u297b",
- "supmult;": u"\u2ac2",
- "supnE;": u"\u2acc",
- "supne;": u"\u228b",
- "supplus;": u"\u2ac0",
- "supset;": u"\u2283",
- "supseteq;": u"\u2287",
- "supseteqq;": u"\u2ac6",
- "supsetneq;": u"\u228b",
- "supsetneqq;": u"\u2acc",
- "supsim;": u"\u2ac8",
- "supsub;": u"\u2ad4",
- "supsup;": u"\u2ad6",
- "swArr;": u"\u21d9",
- "swarhk;": u"\u2926",
- "swarr;": u"\u2199",
- "swarrow;": u"\u2199",
- "swnwar;": u"\u292a",
- "szlig": u"\xdf",
- "szlig;": u"\xdf",
- "target;": u"\u2316",
- "tau;": u"\u03c4",
- "tbrk;": u"\u23b4",
- "tcaron;": u"\u0165",
- "tcedil;": u"\u0163",
- "tcy;": u"\u0442",
- "tdot;": u"\u20db",
- "telrec;": u"\u2315",
- "tfr;": u"\U0001d531",
- "there4;": u"\u2234",
- "therefore;": u"\u2234",
- "theta;": u"\u03b8",
- "thetasym;": u"\u03d1",
- "thetav;": u"\u03d1",
- "thickapprox;": u"\u2248",
- "thicksim;": u"\u223c",
- "thinsp;": u"\u2009",
- "thkap;": u"\u2248",
- "thksim;": u"\u223c",
- "thorn": u"\xfe",
- "thorn;": u"\xfe",
- "tilde;": u"\u02dc",
- "times": u"\xd7",
- "times;": u"\xd7",
- "timesb;": u"\u22a0",
- "timesbar;": u"\u2a31",
- "timesd;": u"\u2a30",
- "tint;": u"\u222d",
- "toea;": u"\u2928",
- "top;": u"\u22a4",
- "topbot;": u"\u2336",
- "topcir;": u"\u2af1",
- "topf;": u"\U0001d565",
- "topfork;": u"\u2ada",
- "tosa;": u"\u2929",
- "tprime;": u"\u2034",
- "trade;": u"\u2122",
- "triangle;": u"\u25b5",
- "triangledown;": u"\u25bf",
- "triangleleft;": u"\u25c3",
- "trianglelefteq;": u"\u22b4",
- "triangleq;": u"\u225c",
- "triangleright;": u"\u25b9",
- "trianglerighteq;": u"\u22b5",
- "tridot;": u"\u25ec",
- "trie;": u"\u225c",
- "triminus;": u"\u2a3a",
- "triplus;": u"\u2a39",
- "trisb;": u"\u29cd",
- "tritime;": u"\u2a3b",
- "trpezium;": u"\u23e2",
- "tscr;": u"\U0001d4c9",
- "tscy;": u"\u0446",
- "tshcy;": u"\u045b",
- "tstrok;": u"\u0167",
- "twixt;": u"\u226c",
- "twoheadleftarrow;": u"\u219e",
- "twoheadrightarrow;": u"\u21a0",
- "uArr;": u"\u21d1",
- "uHar;": u"\u2963",
- "uacute": u"\xfa",
- "uacute;": u"\xfa",
- "uarr;": u"\u2191",
- "ubrcy;": u"\u045e",
- "ubreve;": u"\u016d",
- "ucirc": u"\xfb",
- "ucirc;": u"\xfb",
- "ucy;": u"\u0443",
- "udarr;": u"\u21c5",
- "udblac;": u"\u0171",
- "udhar;": u"\u296e",
- "ufisht;": u"\u297e",
- "ufr;": u"\U0001d532",
- "ugrave": u"\xf9",
- "ugrave;": u"\xf9",
- "uharl;": u"\u21bf",
- "uharr;": u"\u21be",
- "uhblk;": u"\u2580",
- "ulcorn;": u"\u231c",
- "ulcorner;": u"\u231c",
- "ulcrop;": u"\u230f",
- "ultri;": u"\u25f8",
- "umacr;": u"\u016b",
- "uml": u"\xa8",
- "uml;": u"\xa8",
- "uogon;": u"\u0173",
- "uopf;": u"\U0001d566",
- "uparrow;": u"\u2191",
- "updownarrow;": u"\u2195",
- "upharpoonleft;": u"\u21bf",
- "upharpoonright;": u"\u21be",
- "uplus;": u"\u228e",
- "upsi;": u"\u03c5",
- "upsih;": u"\u03d2",
- "upsilon;": u"\u03c5",
- "upuparrows;": u"\u21c8",
- "urcorn;": u"\u231d",
- "urcorner;": u"\u231d",
- "urcrop;": u"\u230e",
- "uring;": u"\u016f",
- "urtri;": u"\u25f9",
- "uscr;": u"\U0001d4ca",
- "utdot;": u"\u22f0",
- "utilde;": u"\u0169",
- "utri;": u"\u25b5",
- "utrif;": u"\u25b4",
- "uuarr;": u"\u21c8",
- "uuml": u"\xfc",
- "uuml;": u"\xfc",
- "uwangle;": u"\u29a7",
- "vArr;": u"\u21d5",
- "vBar;": u"\u2ae8",
- "vBarv;": u"\u2ae9",
- "vDash;": u"\u22a8",
- "vangrt;": u"\u299c",
- "varepsilon;": u"\u03f5",
- "varkappa;": u"\u03f0",
- "varnothing;": u"\u2205",
- "varphi;": u"\u03d5",
- "varpi;": u"\u03d6",
- "varpropto;": u"\u221d",
- "varr;": u"\u2195",
- "varrho;": u"\u03f1",
- "varsigma;": u"\u03c2",
- "varsubsetneq;": u"\u228a\ufe00",
- "varsubsetneqq;": u"\u2acb\ufe00",
- "varsupsetneq;": u"\u228b\ufe00",
- "varsupsetneqq;": u"\u2acc\ufe00",
- "vartheta;": u"\u03d1",
- "vartriangleleft;": u"\u22b2",
- "vartriangleright;": u"\u22b3",
- "vcy;": u"\u0432",
- "vdash;": u"\u22a2",
- "vee;": u"\u2228",
- "veebar;": u"\u22bb",
- "veeeq;": u"\u225a",
- "vellip;": u"\u22ee",
- "verbar;": u"|",
- "vert;": u"|",
- "vfr;": u"\U0001d533",
- "vltri;": u"\u22b2",
- "vnsub;": u"\u2282\u20d2",
- "vnsup;": u"\u2283\u20d2",
- "vopf;": u"\U0001d567",
- "vprop;": u"\u221d",
- "vrtri;": u"\u22b3",
- "vscr;": u"\U0001d4cb",
- "vsubnE;": u"\u2acb\ufe00",
- "vsubne;": u"\u228a\ufe00",
- "vsupnE;": u"\u2acc\ufe00",
- "vsupne;": u"\u228b\ufe00",
- "vzigzag;": u"\u299a",
- "wcirc;": u"\u0175",
- "wedbar;": u"\u2a5f",
- "wedge;": u"\u2227",
- "wedgeq;": u"\u2259",
- "weierp;": u"\u2118",
- "wfr;": u"\U0001d534",
- "wopf;": u"\U0001d568",
- "wp;": u"\u2118",
- "wr;": u"\u2240",
- "wreath;": u"\u2240",
- "wscr;": u"\U0001d4cc",
- "xcap;": u"\u22c2",
- "xcirc;": u"\u25ef",
- "xcup;": u"\u22c3",
- "xdtri;": u"\u25bd",
- "xfr;": u"\U0001d535",
- "xhArr;": u"\u27fa",
- "xharr;": u"\u27f7",
- "xi;": u"\u03be",
- "xlArr;": u"\u27f8",
- "xlarr;": u"\u27f5",
- "xmap;": u"\u27fc",
- "xnis;": u"\u22fb",
- "xodot;": u"\u2a00",
- "xopf;": u"\U0001d569",
- "xoplus;": u"\u2a01",
- "xotime;": u"\u2a02",
- "xrArr;": u"\u27f9",
- "xrarr;": u"\u27f6",
- "xscr;": u"\U0001d4cd",
- "xsqcup;": u"\u2a06",
- "xuplus;": u"\u2a04",
- "xutri;": u"\u25b3",
- "xvee;": u"\u22c1",
- "xwedge;": u"\u22c0",
- "yacute": u"\xfd",
- "yacute;": u"\xfd",
- "yacy;": u"\u044f",
- "ycirc;": u"\u0177",
- "ycy;": u"\u044b",
- "yen": u"\xa5",
- "yen;": u"\xa5",
- "yfr;": u"\U0001d536",
- "yicy;": u"\u0457",
- "yopf;": u"\U0001d56a",
- "yscr;": u"\U0001d4ce",
- "yucy;": u"\u044e",
- "yuml": u"\xff",
- "yuml;": u"\xff",
- "zacute;": u"\u017a",
- "zcaron;": u"\u017e",
- "zcy;": u"\u0437",
- "zdot;": u"\u017c",
- "zeetrf;": u"\u2128",
- "zeta;": u"\u03b6",
- "zfr;": u"\U0001d537",
- "zhcy;": u"\u0436",
- "zigrarr;": u"\u21dd",
- "zopf;": u"\U0001d56b",
- "zscr;": u"\U0001d4cf",
- "zwj;": u"\u200d",
- "zwnj;": u"\u200c",
-}
-
-replacementCharacters = {
- 0x0:u"\uFFFD",
- 0x0d:u"\u000D",
- 0x80:u"\u20AC",
- 0x81:u"\u0081",
- 0x81:u"\u0081",
- 0x82:u"\u201A",
- 0x83:u"\u0192",
- 0x84:u"\u201E",
- 0x85:u"\u2026",
- 0x86:u"\u2020",
- 0x87:u"\u2021",
- 0x88:u"\u02C6",
- 0x89:u"\u2030",
- 0x8A:u"\u0160",
- 0x8B:u"\u2039",
- 0x8C:u"\u0152",
- 0x8D:u"\u008D",
- 0x8E:u"\u017D",
- 0x8F:u"\u008F",
- 0x90:u"\u0090",
- 0x91:u"\u2018",
- 0x92:u"\u2019",
- 0x93:u"\u201C",
- 0x94:u"\u201D",
- 0x95:u"\u2022",
- 0x96:u"\u2013",
- 0x97:u"\u2014",
- 0x98:u"\u02DC",
- 0x99:u"\u2122",
- 0x9A:u"\u0161",
- 0x9B:u"\u203A",
- 0x9C:u"\u0153",
- 0x9D:u"\u009D",
- 0x9E:u"\u017E",
- 0x9F:u"\u0178",
-}
-
-encodings = {
- '437': 'cp437',
- '850': 'cp850',
- '852': 'cp852',
- '855': 'cp855',
- '857': 'cp857',
- '860': 'cp860',
- '861': 'cp861',
- '862': 'cp862',
- '863': 'cp863',
- '865': 'cp865',
- '866': 'cp866',
- '869': 'cp869',
- 'ansix341968': 'ascii',
- 'ansix341986': 'ascii',
- 'arabic': 'iso8859-6',
- 'ascii': 'ascii',
- 'asmo708': 'iso8859-6',
- 'big5': 'big5',
- 'big5hkscs': 'big5hkscs',
- 'chinese': 'gbk',
- 'cp037': 'cp037',
- 'cp1026': 'cp1026',
- 'cp154': 'ptcp154',
- 'cp367': 'ascii',
- 'cp424': 'cp424',
- 'cp437': 'cp437',
- 'cp500': 'cp500',
- 'cp775': 'cp775',
- 'cp819': 'windows-1252',
- 'cp850': 'cp850',
- 'cp852': 'cp852',
- 'cp855': 'cp855',
- 'cp857': 'cp857',
- 'cp860': 'cp860',
- 'cp861': 'cp861',
- 'cp862': 'cp862',
- 'cp863': 'cp863',
- 'cp864': 'cp864',
- 'cp865': 'cp865',
- 'cp866': 'cp866',
- 'cp869': 'cp869',
- 'cp936': 'gbk',
- 'cpgr': 'cp869',
- 'cpis': 'cp861',
- 'csascii': 'ascii',
- 'csbig5': 'big5',
- 'cseuckr': 'cp949',
- 'cseucpkdfmtjapanese': 'euc_jp',
- 'csgb2312': 'gbk',
- 'cshproman8': 'hp-roman8',
- 'csibm037': 'cp037',
- 'csibm1026': 'cp1026',
- 'csibm424': 'cp424',
- 'csibm500': 'cp500',
- 'csibm855': 'cp855',
- 'csibm857': 'cp857',
- 'csibm860': 'cp860',
- 'csibm861': 'cp861',
- 'csibm863': 'cp863',
- 'csibm864': 'cp864',
- 'csibm865': 'cp865',
- 'csibm866': 'cp866',
- 'csibm869': 'cp869',
- 'csiso2022jp': 'iso2022_jp',
- 'csiso2022jp2': 'iso2022_jp_2',
- 'csiso2022kr': 'iso2022_kr',
- 'csiso58gb231280': 'gbk',
- 'csisolatin1': 'windows-1252',
- 'csisolatin2': 'iso8859-2',
- 'csisolatin3': 'iso8859-3',
- 'csisolatin4': 'iso8859-4',
- 'csisolatin5': 'windows-1254',
- 'csisolatin6': 'iso8859-10',
- 'csisolatinarabic': 'iso8859-6',
- 'csisolatincyrillic': 'iso8859-5',
- 'csisolatingreek': 'iso8859-7',
- 'csisolatinhebrew': 'iso8859-8',
- 'cskoi8r': 'koi8-r',
- 'csksc56011987': 'cp949',
- 'cspc775baltic': 'cp775',
- 'cspc850multilingual': 'cp850',
- 'cspc862latinhebrew': 'cp862',
- 'cspc8codepage437': 'cp437',
- 'cspcp852': 'cp852',
- 'csptcp154': 'ptcp154',
- 'csshiftjis': 'shift_jis',
- 'csunicode11utf7': 'utf-7',
- 'cyrillic': 'iso8859-5',
- 'cyrillicasian': 'ptcp154',
- 'ebcdiccpbe': 'cp500',
- 'ebcdiccpca': 'cp037',
- 'ebcdiccpch': 'cp500',
- 'ebcdiccphe': 'cp424',
- 'ebcdiccpnl': 'cp037',
- 'ebcdiccpus': 'cp037',
- 'ebcdiccpwt': 'cp037',
- 'ecma114': 'iso8859-6',
- 'ecma118': 'iso8859-7',
- 'elot928': 'iso8859-7',
- 'eucjp': 'euc_jp',
- 'euckr': 'cp949',
- 'extendedunixcodepackedformatforjapanese': 'euc_jp',
- 'gb18030': 'gb18030',
- 'gb2312': 'gbk',
- 'gb231280': 'gbk',
- 'gbk': 'gbk',
- 'greek': 'iso8859-7',
- 'greek8': 'iso8859-7',
- 'hebrew': 'iso8859-8',
- 'hproman8': 'hp-roman8',
- 'hzgb2312': 'hz',
- 'ibm037': 'cp037',
- 'ibm1026': 'cp1026',
- 'ibm367': 'ascii',
- 'ibm424': 'cp424',
- 'ibm437': 'cp437',
- 'ibm500': 'cp500',
- 'ibm775': 'cp775',
- 'ibm819': 'windows-1252',
- 'ibm850': 'cp850',
- 'ibm852': 'cp852',
- 'ibm855': 'cp855',
- 'ibm857': 'cp857',
- 'ibm860': 'cp860',
- 'ibm861': 'cp861',
- 'ibm862': 'cp862',
- 'ibm863': 'cp863',
- 'ibm864': 'cp864',
- 'ibm865': 'cp865',
- 'ibm866': 'cp866',
- 'ibm869': 'cp869',
- 'iso2022jp': 'iso2022_jp',
- 'iso2022jp2': 'iso2022_jp_2',
- 'iso2022kr': 'iso2022_kr',
- 'iso646irv1991': 'ascii',
- 'iso646us': 'ascii',
- 'iso88591': 'windows-1252',
- 'iso885910': 'iso8859-10',
- 'iso8859101992': 'iso8859-10',
- 'iso885911987': 'windows-1252',
- 'iso885913': 'iso8859-13',
- 'iso885914': 'iso8859-14',
- 'iso8859141998': 'iso8859-14',
- 'iso885915': 'iso8859-15',
- 'iso885916': 'iso8859-16',
- 'iso8859162001': 'iso8859-16',
- 'iso88592': 'iso8859-2',
- 'iso885921987': 'iso8859-2',
- 'iso88593': 'iso8859-3',
- 'iso885931988': 'iso8859-3',
- 'iso88594': 'iso8859-4',
- 'iso885941988': 'iso8859-4',
- 'iso88595': 'iso8859-5',
- 'iso885951988': 'iso8859-5',
- 'iso88596': 'iso8859-6',
- 'iso885961987': 'iso8859-6',
- 'iso88597': 'iso8859-7',
- 'iso885971987': 'iso8859-7',
- 'iso88598': 'iso8859-8',
- 'iso885981988': 'iso8859-8',
- 'iso88599': 'windows-1254',
- 'iso885991989': 'windows-1254',
- 'isoceltic': 'iso8859-14',
- 'isoir100': 'windows-1252',
- 'isoir101': 'iso8859-2',
- 'isoir109': 'iso8859-3',
- 'isoir110': 'iso8859-4',
- 'isoir126': 'iso8859-7',
- 'isoir127': 'iso8859-6',
- 'isoir138': 'iso8859-8',
- 'isoir144': 'iso8859-5',
- 'isoir148': 'windows-1254',
- 'isoir149': 'cp949',
- 'isoir157': 'iso8859-10',
- 'isoir199': 'iso8859-14',
- 'isoir226': 'iso8859-16',
- 'isoir58': 'gbk',
- 'isoir6': 'ascii',
- 'koi8r': 'koi8-r',
- 'koi8u': 'koi8-u',
- 'korean': 'cp949',
- 'ksc5601': 'cp949',
- 'ksc56011987': 'cp949',
- 'ksc56011989': 'cp949',
- 'l1': 'windows-1252',
- 'l10': 'iso8859-16',
- 'l2': 'iso8859-2',
- 'l3': 'iso8859-3',
- 'l4': 'iso8859-4',
- 'l5': 'windows-1254',
- 'l6': 'iso8859-10',
- 'l8': 'iso8859-14',
- 'latin1': 'windows-1252',
- 'latin10': 'iso8859-16',
- 'latin2': 'iso8859-2',
- 'latin3': 'iso8859-3',
- 'latin4': 'iso8859-4',
- 'latin5': 'windows-1254',
- 'latin6': 'iso8859-10',
- 'latin8': 'iso8859-14',
- 'latin9': 'iso8859-15',
- 'ms936': 'gbk',
- 'mskanji': 'shift_jis',
- 'pt154': 'ptcp154',
- 'ptcp154': 'ptcp154',
- 'r8': 'hp-roman8',
- 'roman8': 'hp-roman8',
- 'shiftjis': 'shift_jis',
- 'tis620': 'cp874',
- 'unicode11utf7': 'utf-7',
- 'us': 'ascii',
- 'usascii': 'ascii',
- 'utf16': 'utf-16',
- 'utf16be': 'utf-16-be',
- 'utf16le': 'utf-16-le',
- 'utf8': 'utf-8',
- 'windows1250': 'cp1250',
- 'windows1251': 'cp1251',
- 'windows1252': 'cp1252',
- 'windows1253': 'cp1253',
- 'windows1254': 'cp1254',
- 'windows1255': 'cp1255',
- 'windows1256': 'cp1256',
- 'windows1257': 'cp1257',
- 'windows1258': 'cp1258',
- 'windows936': 'gbk',
- 'x-x-big5': 'big5'}
-
-tokenTypes = {
- "Doctype":0,
- "Characters":1,
- "SpaceCharacters":2,
- "StartTag":3,
- "EndTag":4,
- "EmptyTag":5,
- "Comment":6,
- "ParseError":7
-}
-
-tagTokenTypes = frozenset((tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]))
-
-
-prefixes = dict([(v,k) for k,v in namespaces.iteritems()])
-prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
-
-class DataLossWarning(UserWarning):
- pass
-
-class ReparseException(Exception):
- pass
diff --git a/html5lib/filters/formfiller.py b/html5lib/filters/formfiller.py
deleted file mode 100644
index 94001714..00000000
--- a/html5lib/filters/formfiller.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# The goal is to finally have a form filler where you pass data for
-# each form, using the algorithm for "Seeding a form with initial values"
-# See http://www.whatwg.org/specs/web-forms/current-work/#seeding
-#
-
-import _base
-
-from html5lib.constants import spaceCharacters
-spaceCharacters = u"".join(spaceCharacters)
-
-class SimpleFilter(_base.Filter):
- def __init__(self, source, fieldStorage):
- _base.Filter.__init__(self, source)
- self.fieldStorage = fieldStorage
-
- def __iter__(self):
- field_indices = {}
- state = None
- field_name = None
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type in ("StartTag", "EmptyTag"):
- name = token["name"].lower()
- if name == "input":
- field_name = None
- field_type = None
- input_value_index = -1
- input_checked_index = -1
- for i,(n,v) in enumerate(token["data"]):
- n = n.lower()
- if n == u"name":
- field_name = v.strip(spaceCharacters)
- elif n == u"type":
- field_type = v.strip(spaceCharacters)
- elif n == u"checked":
- input_checked_index = i
- elif n == u"value":
- input_value_index = i
-
- value_list = self.fieldStorage.getlist(field_name)
- field_index = field_indices.setdefault(field_name, 0)
- if field_index < len(value_list):
- value = value_list[field_index]
- else:
- value = ""
-
- if field_type in (u"checkbox", u"radio"):
- if value_list:
- if token["data"][input_value_index][1] == value:
- if input_checked_index < 0:
- token["data"].append((u"checked", u""))
- field_indices[field_name] = field_index + 1
- elif input_checked_index >= 0:
- del token["data"][input_checked_index]
-
- elif field_type not in (u"button", u"submit", u"reset"):
- if input_value_index >= 0:
- token["data"][input_value_index] = (u"value", value)
- else:
- token["data"].append((u"value", value))
- field_indices[field_name] = field_index + 1
-
- field_type = None
- field_name = None
-
- elif name == "textarea":
- field_type = "textarea"
- field_name = dict((token["data"])[::-1])["name"]
-
- elif name == "select":
- field_type = "select"
- attributes = dict(token["data"][::-1])
- field_name = attributes.get("name")
- is_select_multiple = "multiple" in attributes
- is_selected_option_found = False
-
- elif field_type == "select" and field_name and name == "option":
- option_selected_index = -1
- option_value = None
- for i,(n,v) in enumerate(token["data"]):
- n = n.lower()
- if n == "selected":
- option_selected_index = i
- elif n == "value":
- option_value = v.strip(spaceCharacters)
- if option_value is None:
- raise NotImplementedError("s without a value= attribute")
- else:
- value_list = self.fieldStorage.getlist(field_name)
- if value_list:
- field_index = field_indices.setdefault(field_name, 0)
- if field_index < len(value_list):
- value = value_list[field_index]
- else:
- value = ""
- if (is_select_multiple or not is_selected_option_found) and option_value == value:
- if option_selected_index < 0:
- token["data"].append((u"selected", u""))
- field_indices[field_name] = field_index + 1
- is_selected_option_found = True
- elif option_selected_index >= 0:
- del token["data"][option_selected_index]
-
- elif field_type is not None and field_name and type == "EndTag":
- name = token["name"].lower()
- if name == field_type:
- if name == "textarea":
- value_list = self.fieldStorage.getlist(field_name)
- if value_list:
- field_index = field_indices.setdefault(field_name, 0)
- if field_index < len(value_list):
- value = value_list[field_index]
- else:
- value = ""
- yield {"type": "Characters", "data": value}
- field_indices[field_name] = field_index + 1
-
- field_name = None
-
- elif name == "option" and field_type == "select":
- pass # TODO: part of "option without value= attribute" processing
-
- elif field_type == "textarea":
- continue # ignore token
-
- yield token
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
deleted file mode 100644
index 8e04d8ac..00000000
--- a/html5lib/filters/inject_meta_charset.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import _base
-
-class Filter(_base.Filter):
- def __init__(self, source, encoding):
- _base.Filter.__init__(self, source)
- self.encoding = encoding
-
- def __iter__(self):
- state = "pre_head"
- meta_found = (self.encoding is None)
- pending = []
-
- for token in _base.Filter.__iter__(self):
- type = token["type"]
- if type == "StartTag":
- if token["name"].lower() == u"head":
- state = "in_head"
-
- elif type == "EmptyTag":
- if token["name"].lower() == u"meta":
- # replace charset with actual encoding
- has_http_equiv_content_type = False
- for (namespace,name),value in token["data"].iteritems():
- if namespace != None:
- continue
- elif name.lower() == u'charset':
- token["data"][(namespace,name)] = self.encoding
- meta_found = True
- break
- elif name == u'http-equiv' and value.lower() == u'content-type':
- has_http_equiv_content_type = True
- else:
- if has_http_equiv_content_type and (None, u"content") in token["data"]:
- token["data"][(None, u"content")] = u'text/html; charset=%s' % self.encoding
- meta_found = True
-
- elif token["name"].lower() == u"head" and not meta_found:
- # insert meta into empty head
- yield {"type": "StartTag", "name": u"head",
- "data": token["data"]}
- yield {"type": "EmptyTag", "name": u"meta",
- "data": {(None, u"charset"): self.encoding}}
- yield {"type": "EndTag", "name": u"head"}
- meta_found = True
- continue
-
- elif type == "EndTag":
- if token["name"].lower() == u"head" and pending:
- # insert meta into head (if necessary) and flush pending queue
- yield pending.pop(0)
- if not meta_found:
- yield {"type": "EmptyTag", "name": u"meta",
- "data": {(None, u"charset"): self.encoding}}
- while pending:
- yield pending.pop(0)
- meta_found = True
- state = "post_head"
-
- if state == "in_head":
- pending.append(token)
- else:
- yield token
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
deleted file mode 100644
index 00235278..00000000
--- a/html5lib/filters/sanitizer.py
+++ /dev/null
@@ -1,8 +0,0 @@
-import _base
-from html5lib.sanitizer import HTMLSanitizerMixin
-
-class Filter(_base.Filter, HTMLSanitizerMixin):
- def __iter__(self):
- for token in _base.Filter.__iter__(self):
- token = self.sanitize_token(token)
- if token: yield token
diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
deleted file mode 100644
index dd785639..00000000
--- a/html5lib/ihatexml.py
+++ /dev/null
@@ -1,177 +0,0 @@
-import re
-
-baseChar = """[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | [#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | [#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | [#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | #x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | [#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | #x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | [#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | [#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | [#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | #x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | [#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]"""
-
-ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]"""
-
-combiningCharacter = """[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | #x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | #x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | #x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | #x3099 | #x309A"""
-
-digit = """[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]"""
-
-extender = """#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]"""
-
-letter = " | ".join([baseChar, ideographic])
-
-#Without the
-name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter,
- extender])
-nameFirst = " | ".join([letter, "_"])
-
-reChar = re.compile(r"#x([\d|A-F]{4,4})")
-reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]")
-
-def charStringToList(chars):
- charRanges = [item.strip() for item in chars.split(" | ")]
- rv = []
- for item in charRanges:
- foundMatch = False
- for regexp in (reChar, reCharRange):
- match = regexp.match(item)
- if match is not None:
- rv.append([hexToInt(item) for item in match.groups()])
- if len(rv[-1]) == 1:
- rv[-1] = rv[-1]*2
- foundMatch = True
- break
- if not foundMatch:
- assert len(item) == 1
-
- rv.append([ord(item)] * 2)
- rv = normaliseCharList(rv)
- return rv
-
-def normaliseCharList(charList):
- charList = sorted(charList)
- for item in charList:
- assert item[1] >= item[0]
- rv = []
- i = 0
- while i < len(charList):
- j = 1
- rv.append(charList[i])
- while i + j < len(charList) and charList[i+j][0] <= rv[-1][1] + 1:
- rv[-1][1] = charList[i+j][1]
- j += 1
- i += j
- return rv
-
-#We don't really support characters above the BMP :(
-max_unicode = int("FFFF", 16)
-
-def missingRanges(charList):
- rv = []
- if charList[0] != 0:
- rv.append([0, charList[0][0] - 1])
- for i, item in enumerate(charList[:-1]):
- rv.append([item[1]+1, charList[i+1][0] - 1])
- if charList[-1][1] != max_unicode:
- rv.append([charList[-1][1] + 1, max_unicode])
- return rv
-
-def listToRegexpStr(charList):
- rv = []
- for item in charList:
- if item[0] == item[1]:
- rv.append(escapeRegexp(unichr(item[0])))
- else:
- rv.append(escapeRegexp(unichr(item[0])) + "-" +
- escapeRegexp(unichr(item[1])))
- return "[%s]"%"".join(rv)
-
-def hexToInt(hex_str):
- return int(hex_str, 16)
-
-def escapeRegexp(string):
- specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}",
- "[", "]", "|", "(", ")", "-")
- for char in specialCharacters:
- string = string.replace(char, "\\" + char)
- if char in string:
- print string
-
- return string
-
-#output from the above
-nonXmlNameBMPRegexp = re.compile(u'[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
-
-nonXmlNameFirstBMPRegexp = re.compile(u'[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]')
-
-class InfosetFilter(object):
- replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
- def __init__(self, replaceChars = None,
- dropXmlnsLocalName = False,
- dropXmlnsAttrNs = False,
- preventDoubleDashComments = False,
- preventDashAtCommentEnd = False,
- replaceFormFeedCharacters = True):
-
- self.dropXmlnsLocalName = dropXmlnsLocalName
- self.dropXmlnsAttrNs = dropXmlnsAttrNs
-
- self.preventDoubleDashComments = preventDoubleDashComments
- self.preventDashAtCommentEnd = preventDashAtCommentEnd
-
- self.replaceFormFeedCharacters = replaceFormFeedCharacters
-
- self.replaceCache = {}
-
- def coerceAttribute(self, name, namespace=None):
- if self.dropXmlnsLocalName and name.startswith("xmlns:"):
- #Need a datalosswarning here
- return None
- elif (self.dropXmlnsAttrNs and
- namespace == "http://www.w3.org/2000/xmlns/"):
- return None
- else:
- return self.toXmlName(name)
-
- def coerceElement(self, name, namespace=None):
- return self.toXmlName(name)
-
- def coerceComment(self, data):
- if self.preventDoubleDashComments:
- while "--" in data:
- data = data.replace("--", "- -")
- return data
-
- def coerceCharacters(self, data):
- if self.replaceFormFeedCharacters:
- data = data.replace("\x0C", " ")
- #Other non-xml characters
- return data
-
- def toXmlName(self, name):
- nameFirst = name[0]
- nameRest = name[1:]
- m = nonXmlNameFirstBMPRegexp.match(nameFirst)
- if m:
- nameFirstOutput = self.getReplacementCharacter(nameFirst)
- else:
- nameFirstOutput = nameFirst
-
- nameRestOutput = nameRest
- replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
- for char in replaceChars:
- replacement = self.getReplacementCharacter(char)
- nameRestOutput = nameRestOutput.replace(char, replacement)
- return nameFirstOutput + nameRestOutput
-
- def getReplacementCharacter(self, char):
- if char in self.replaceCache:
- replacement = self.replaceCache[char]
- else:
- replacement = self.escapeChar(char)
- return replacement
-
- def fromXmlName(self, name):
- for item in set(self.replacementRegexp.findall(name)):
- name = name.replace(item, self.unescapeChar(item))
- return name
-
- def escapeChar(self, char):
- replacement = "U" + hex(ord(char))[2:].upper().rjust(5, "0")
- self.replaceCache[char] = replacement
- return replacement
-
- def unescapeChar(self, charcode):
- return unichr(int(charcode[1:], 16))
diff --git a/html5lib/sanitizer.py b/html5lib/sanitizer.py
deleted file mode 100644
index ae4c7d83..00000000
--- a/html5lib/sanitizer.py
+++ /dev/null
@@ -1,258 +0,0 @@
-import re
-from xml.sax.saxutils import escape, unescape
-
-from tokenizer import HTMLTokenizer
-from constants import tokenTypes
-
-class HTMLSanitizerMixin(object):
- """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
-
- acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area',
- 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button',
- 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup',
- 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn',
- 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset',
- 'figcaption', 'figure', 'footer', 'font', 'form', 'header', 'h1',
- 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins',
- 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter',
- 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option',
- 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select',
- 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
- 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
- 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
-
- mathml_elements = ['maction', 'math', 'merror', 'mfrac', 'mi',
- 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom',
- 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub',
- 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder',
- 'munderover', 'none']
-
- svg_elements = ['a', 'animate', 'animateColor', 'animateMotion',
- 'animateTransform', 'clipPath', 'circle', 'defs', 'desc', 'ellipse',
- 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern',
- 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph',
- 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect',
- 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use']
-
- acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey',
- 'action', 'align', 'alt', 'autocomplete', 'autofocus', 'axis',
- 'background', 'balance', 'bgcolor', 'bgproperties', 'border',
- 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding',
- 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff',
- 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color',
- 'cols', 'colspan', 'compact', 'contenteditable', 'controls', 'coords',
- 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default',
- 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end',
- 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers',
- 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace',
- 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing',
- 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend',
- 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method',
- 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open',
- 'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg',
- 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min',
- 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan',
- 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start',
- 'step', 'style', 'summary', 'suppress', 'tabindex', 'target',
- 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap',
- 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml',
- 'width', 'wrap', 'xml:lang']
-
- mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign',
- 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth',
- 'display', 'displaystyle', 'equalcolumns', 'equalrows', 'fence',
- 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace',
- 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize',
- 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines',
- 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection',
- 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show',
- 'xlink:type', 'xmlns', 'xmlns:xlink']
-
- svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic',
- 'arabic-form', 'ascent', 'attributeName', 'attributeType',
- 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height',
- 'class', 'clip-path', 'color', 'color-rendering', 'content', 'cx',
- 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill',
- 'fill-opacity', 'fill-rule', 'font-family', 'font-size',
- 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from',
- 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging',
- 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k',
- 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'marker-end',
- 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits',
- 'markerWidth', 'mathematical', 'max', 'min', 'name', 'offset',
- 'opacity', 'orient', 'origin', 'overline-position',
- 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points',
- 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount',
- 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart',
- 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color',
- 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness',
- 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap',
- 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity',
- 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to',
- 'transform', 'type', 'u1', 'u2', 'underline-position',
- 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em',
- 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x',
- 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole',
- 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type',
- 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y',
- 'y1', 'y2', 'zoomAndPan']
-
- attr_val_is_uri = ['href', 'src', 'cite', 'action', 'longdesc',
- 'xlink:href', 'xml:base']
-
- svg_attr_val_allows_ref = ['clip-path', 'color-profile', 'cursor', 'fill',
- 'filter', 'marker', 'marker-start', 'marker-mid', 'marker-end',
- 'mask', 'stroke']
-
- svg_allow_local_href = ['altGlyph', 'animate', 'animateColor',
- 'animateMotion', 'animateTransform', 'cursor', 'feImage', 'filter',
- 'linearGradient', 'pattern', 'radialGradient', 'textpath', 'tref',
- 'set', 'use']
-
- acceptable_css_properties = ['azimuth', 'background-color',
- 'border-bottom-color', 'border-collapse', 'border-color',
- 'border-left-color', 'border-right-color', 'border-top-color', 'clear',
- 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font',
- 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight',
- 'height', 'letter-spacing', 'line-height', 'overflow', 'pause',
- 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness',
- 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation',
- 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent',
- 'unicode-bidi', 'vertical-align', 'voice-family', 'volume',
- 'white-space', 'width']
-
- acceptable_css_keywords = ['auto', 'aqua', 'black', 'block', 'blue',
- 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed',
- 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left',
- 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive',
- 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top',
- 'transparent', 'underline', 'white', 'yellow']
-
- acceptable_svg_properties = [ 'fill', 'fill-opacity', 'fill-rule',
- 'stroke', 'stroke-width', 'stroke-linecap', 'stroke-linejoin',
- 'stroke-opacity']
-
- acceptable_protocols = [ 'ed2k', 'ftp', 'http', 'https', 'irc',
- 'mailto', 'news', 'gopher', 'nntp', 'telnet', 'webcal',
- 'xmpp', 'callto', 'feed', 'urn', 'aim', 'rsync', 'tag',
- 'ssh', 'sftp', 'rtsp', 'afs' ]
-
- # subclasses may define their own versions of these constants
- allowed_elements = acceptable_elements + mathml_elements + svg_elements
- allowed_attributes = acceptable_attributes + mathml_attributes + svg_attributes
- allowed_css_properties = acceptable_css_properties
- allowed_css_keywords = acceptable_css_keywords
- allowed_svg_properties = acceptable_svg_properties
- allowed_protocols = acceptable_protocols
-
- # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
- # stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
- # attributes are parsed, and a restricted set, # specified by
- # ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
- # attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
- # in ALLOWED_PROTOCOLS are allowed.
- #
- # sanitize_html('')
- # => <script> do_nasty_stuff() </script>
- # sanitize_html('Click here for $100 ')
- # => Click here for $100
- def sanitize_token(self, token):
-
- # accommodate filters which use token_type differently
- token_type = token["type"]
- if token_type in tokenTypes.keys():
- token_type = tokenTypes[token_type]
-
- if token_type in (tokenTypes["StartTag"], tokenTypes["EndTag"],
- tokenTypes["EmptyTag"]):
- if token["name"] in self.allowed_elements:
- if token.has_key("data"):
- attrs = dict([(name,val) for name,val in
- token["data"][::-1]
- if name in self.allowed_attributes])
- for attr in self.attr_val_is_uri:
- if not attrs.has_key(attr):
- continue
- val_unescaped = re.sub("[`\000-\040\177-\240\s]+", '',
- unescape(attrs[attr])).lower()
- #remove replacement characters from unescaped characters
- val_unescaped = val_unescaped.replace(u"\ufffd", "")
- if (re.match("^[a-z0-9][-+.a-z0-9]*:",val_unescaped) and
- (val_unescaped.split(':')[0] not in
- self.allowed_protocols)):
- del attrs[attr]
- for attr in self.svg_attr_val_allows_ref:
- if attr in attrs:
- attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)',
- ' ',
- unescape(attrs[attr]))
- if (token["name"] in self.svg_allow_local_href and
- 'xlink:href' in attrs and re.search('^\s*[^#\s].*',
- attrs['xlink:href'])):
- del attrs['xlink:href']
- if attrs.has_key('style'):
- attrs['style'] = self.sanitize_css(attrs['style'])
- token["data"] = [[name,val] for name,val in attrs.items()]
- return token
- else:
- if token_type == tokenTypes["EndTag"]:
- token["data"] = "%s>" % token["name"]
- elif token["data"]:
- attrs = ''.join([' %s="%s"' % (k,escape(v)) for k,v in token["data"]])
- token["data"] = "<%s%s>" % (token["name"],attrs)
- else:
- token["data"] = "<%s>" % token["name"]
- if token.get("selfClosing"):
- token["data"]=token["data"][:-1] + "/>"
-
- if token["type"] in tokenTypes.keys():
- token["type"] = "Characters"
- else:
- token["type"] = tokenTypes["Characters"]
-
- del token["name"]
- return token
- elif token_type == tokenTypes["Comment"]:
- pass
- else:
- return token
-
- def sanitize_css(self, style):
- # disallow urls
- style=re.compile('url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ',style)
-
- # gauntlet
- if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return ''
- if not re.match("^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): return ''
-
- clean = []
- for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style):
- if not value: continue
- if prop.lower() in self.allowed_css_properties:
- clean.append(prop + ': ' + value + ';')
- elif prop.split('-')[0].lower() in ['background','border','margin',
- 'padding']:
- for keyword in value.split():
- if not keyword in self.acceptable_css_keywords and \
- not re.match("^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$",keyword):
- break
- else:
- clean.append(prop + ': ' + value + ';')
- elif prop.lower() in self.allowed_svg_properties:
- clean.append(prop + ': ' + value + ';')
-
- return ' '.join(clean)
-
-class HTMLSanitizer(HTMLTokenizer, HTMLSanitizerMixin):
- def __init__(self, stream, encoding=None, parseMeta=True, useChardet=True,
- lowercaseElementName=False, lowercaseAttrName=False, parser=None):
- #Change case matching defaults as we only output lowercase html anyway
- #This solution doesn't seem ideal...
- HTMLTokenizer.__init__(self, stream, encoding, parseMeta, useChardet,
- lowercaseElementName, lowercaseAttrName, parser=parser)
-
- def __iter__(self):
- for token in HTMLTokenizer.__iter__(self):
- token = self.sanitize_token(token)
- if token:
- yield token
diff --git a/html5lib/serializer/__init__.py b/html5lib/serializer/__init__.py
deleted file mode 100644
index 1b746655..00000000
--- a/html5lib/serializer/__init__.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-from html5lib import treewalkers
-
-from htmlserializer import HTMLSerializer
-from xhtmlserializer import XHTMLSerializer
-
-def serialize(input, tree="simpletree", format="html", encoding=None,
- **serializer_opts):
- # XXX: Should we cache this?
- walker = treewalkers.getTreeWalker(tree)
- if format == "html":
- s = HTMLSerializer(**serializer_opts)
- elif format == "xhtml":
- s = XHTMLSerializer(**serializer_opts)
- else:
- raise ValueError, "type must be either html or xhtml"
- return s.render(walker(input), encoding)
diff --git a/html5lib/serializer/xhtmlserializer.py b/html5lib/serializer/xhtmlserializer.py
deleted file mode 100644
index 7fdce47b..00000000
--- a/html5lib/serializer/xhtmlserializer.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from htmlserializer import HTMLSerializer
-
-class XHTMLSerializer(HTMLSerializer):
- quote_attr_values = True
- minimize_boolean_attributes = False
- use_trailing_solidus = True
- escape_lt_in_attrs = True
- omit_optional_tags = False
- escape_rcdata = True
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
deleted file mode 100644
index a7e9c4ea..00000000
--- a/html5lib/tests/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import sys
-import os
-
-parent_path = os.path.abspath(os.path.join(os.path.split(__file__)[0], ".."))
-
-if not parent_path in sys.path:
- sys.path.insert(0, parent_path)
-del parent_path
-
-from runtests import buildTestSuite
-
-import support
diff --git a/html5lib/tests/mockParser.py b/html5lib/tests/mockParser.py
deleted file mode 100644
index 5f9092b2..00000000
--- a/html5lib/tests/mockParser.py
+++ /dev/null
@@ -1,37 +0,0 @@
-import sys
-import os
-
-if __name__ == '__main__':
- #Allow us to import from the src directory
- os.chdir(os.path.split(os.path.abspath(__file__))[0])
- sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, "src")))
-
-from tokenizer import HTMLTokenizer
-
-class HTMLParser(object):
- """ Fake parser to test tokenizer output """
- def parse(self, stream, output=True):
- tokenizer = HTMLTokenizer(stream)
- for token in tokenizer:
- if output:
- print token
-
-if __name__ == "__main__":
- x = HTMLParser()
- if len(sys.argv) > 1:
- if len(sys.argv) > 2:
- import hotshot, hotshot.stats
- prof = hotshot.Profile('stats.prof')
- prof.runcall(x.parse, sys.argv[1], False)
- prof.close()
- stats = hotshot.stats.load('stats.prof')
- stats.strip_dirs()
- stats.sort_stats('time')
- stats.print_stats()
- else:
- x.parse(sys.argv[1])
- else:
- print """Usage: python mockParser.py filename [stats]
- If stats is specified the hotshots profiler will run and output the
- stats instead.
- """
diff --git a/html5lib/tests/runparsertests.py b/html5lib/tests/runparsertests.py
deleted file mode 100644
index e671f8d7..00000000
--- a/html5lib/tests/runparsertests.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import sys
-import os
-import glob
-import unittest
-
-#Allow us to import the parent module
-os.chdir(os.path.split(os.path.abspath(__file__))[0])
-sys.path.insert(0, os.path.abspath(os.curdir))
-sys.path.insert(0, os.path.abspath(os.pardir))
-sys.path.insert(0, os.path.join(os.path.abspath(os.pardir), "src"))
-
-def buildTestSuite():
- suite = unittest.TestSuite()
- for testcase in glob.glob('test_*.py'):
- if testcase in ("test_tokenizer.py", "test_parser.py", "test_parser2.py"):
- module = os.path.splitext(testcase)[0]
- suite.addTest(__import__(module).buildTestSuite())
- return suite
-
-def main():
- results = unittest.TextTestRunner().run(buildTestSuite())
- return results
-
-if __name__ == "__main__":
- results = main()
- if not results.wasSuccessful():
- sys.exit(1)
diff --git a/html5lib/tests/runtests.py b/html5lib/tests/runtests.py
deleted file mode 100644
index b8e35722..00000000
--- a/html5lib/tests/runtests.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import sys
-import os
-import glob
-import unittest
-
-def buildTestSuite():
- suite = unittest.TestSuite()
- for testcase in glob.glob('test_*.py'):
- module = os.path.splitext(testcase)[0]
- suite.addTest(__import__(module).buildTestSuite())
- return suite
-
-def main():
- results = unittest.TextTestRunner().run(buildTestSuite())
- return results
-
-if __name__ == "__main__":
- results = main()
- if not results.wasSuccessful():
- sys.exit(1)
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
deleted file mode 100644
index deaf2e25..00000000
--- a/html5lib/tests/support.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import os
-import sys
-import codecs
-import glob
-
-base_path = os.path.split(__file__)[0]
-
-if os.path.exists(os.path.join(base_path, 'testdata')):
- #release
- test_dir = os.path.join(base_path, 'testdata')
-else:
- #development
- test_dir = os.path.abspath(
- os.path.join(base_path,
- os.path.pardir, os.path.pardir,
- os.path.pardir, 'testdata'))
- assert os.path.exists(test_dir), "Test data not found"
- #import the development html5lib
- sys.path.insert(0, os.path.abspath(os.path.join(base_path,
- os.path.pardir,
- os.path.pardir)))
-
-import html5lib
-from html5lib import html5parser, treebuilders
-del base_path
-
-#Build a dict of avaliable trees
-treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
- "DOM":treebuilders.getTreeBuilder("dom")}
-
-#Try whatever etree implementations are avaliable from a list that are
-#"supposed" to work
-try:
- import xml.etree.ElementTree as ElementTree
- treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
-except ImportError:
- try:
- import elementtree.ElementTree as ElementTree
- treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
- except ImportError:
- pass
-
-try:
- import xml.etree.cElementTree as cElementTree
- treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
-except ImportError:
- try:
- import cElementTree
- treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
- except ImportError:
- pass
-
-try:
- import lxml.etree as lxml
- treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
-except ImportError:
- pass
-
-try:
- import BeautifulSoup
- treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
-except ImportError:
- pass
-
-def html5lib_test_files(subdirectory, files='*.dat'):
- return glob.glob(os.path.join(test_dir,subdirectory,files))
-
-class DefaultDict(dict):
- def __init__(self, default, *args, **kwargs):
- self.default = default
- dict.__init__(self, *args, **kwargs)
-
- def __getitem__(self, key):
- return dict.get(self, key, self.default)
-
-class TestData(object):
- def __init__(self, filename, newTestHeading="data"):
- self.f = codecs.open(filename, encoding="utf8")
- self.newTestHeading = newTestHeading
-
- def __iter__(self):
- data = DefaultDict(None)
- key=None
- for line in self.f:
- heading = self.isSectionHeading(line)
- if heading:
- if data and heading == self.newTestHeading:
- #Remove trailing newline
- data[key] = data[key][:-1]
- yield self.normaliseOutput(data)
- data = DefaultDict(None)
- key = heading
- data[key]=""
- elif key is not None:
- data[key] += line
- if data:
- yield self.normaliseOutput(data)
-
- def isSectionHeading(self, line):
- """If the current heading is a test section heading return the heading,
- otherwise return False"""
- if line.startswith("#"):
- return line[1:].strip()
- else:
- return False
-
- def normaliseOutput(self, data):
- #Remove trailing newlines
- for key,value in data.iteritems():
- if value.endswith("\n"):
- data[key] = value[:-1]
- return data
-
-def convert(stripChars):
- def convertData(data):
- """convert the output of str(document) to the format used in the testcases"""
- data = data.split("\n")
- rv = []
- for line in data:
- if line.startswith("|"):
- rv.append(line[stripChars:])
- else:
- rv.append(line)
- return "\n".join(rv)
- return convertData
-
-convertExpected = convert(2)
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
deleted file mode 100644
index c8c63e84..00000000
--- a/html5lib/tests/test_encoding.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import os
-import unittest
-from support import html5lib_test_files, TestData, test_dir
-
-from html5lib import HTMLParser, inputstream
-
-import re, unittest
-
-class Html5EncodingTestCase(unittest.TestCase):
- def test_codec_name(self):
- self.assertEquals(inputstream.codecName("utf-8"), "utf-8")
- self.assertEquals(inputstream.codecName("utf8"), "utf-8")
- self.assertEquals(inputstream.codecName(" utf8 "), "utf-8")
- self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252")
-
-def buildTestSuite():
- for filename in html5lib_test_files("encoding"):
- test_name = os.path.basename(filename).replace('.dat',''). \
- replace('-','')
- tests = TestData(filename, "data")
- for idx, test in enumerate(tests):
- def encodingTest(self, data=test['data'],
- encoding=test['encoding']):
- p = HTMLParser()
- t = p.parse(data, useChardet=False)
-
- errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"%
- (data, repr(encoding.lower()),
- repr(p.tokenizer.stream.charEncoding)))
- self.assertEquals(encoding.lower(),
- p.tokenizer.stream.charEncoding[0],
- errorMessage)
- setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1),
- encodingTest)
-
- try:
- import chardet
- def test_chardet(self):
- data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read()
- encoding = inputstream.HTMLInputStream(data).charEncoding
- assert encoding[0].lower() == "big5"
- setattr(Html5EncodingTestCase, 'test_chardet', test_chardet)
- except ImportError:
- print "chardet not found, skipping chardet tests"
-
-
- return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-def main():
- buildTestSuite()
- unittest.main()
-
-if __name__ == "__main__":
- main()
diff --git a/html5lib/tests/test_formfiller.py b/html5lib/tests/test_formfiller.py
deleted file mode 100644
index debc11b7..00000000
--- a/html5lib/tests/test_formfiller.py
+++ /dev/null
@@ -1,296 +0,0 @@
-import sys
-import unittest
-
-from html5lib.filters.formfiller import SimpleFilter
-
-class FieldStorage(dict):
- def getlist(self, name):
- l = self[name]
- if isinstance(l, list):
- return l
- elif isinstance(l, tuple) or hasattr(l, '__iter__'):
- return list(l)
- return [l]
-
-class TestCase(unittest.TestCase):
- def runTest(self, input, formdata, expected):
- try:
- output = list(SimpleFilter(input, formdata))
- except NotImplementedError, nie:
- # Amnesty for those that confess...
- print >>sys.stderr, "Not implemented:", str(nie)
- else:
- errorMsg = "\n".join(["\n\nInput:", str(input),
- "\nForm data:", str(formdata),
- "\nExpected:", str(expected),
- "\nReceived:", str(output)])
- self.assertEquals(output, expected, errorMsg)
-
- def testSingleTextInputWithValue(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"quux")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
-
- def testSingleTextInputWithoutValue(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"text"), (u"name", u"foo")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"text"), (u"name", u"foo"), (u"value", u"bar")]}])
-
- def testSingleCheckbox(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
-
- def testSingleCheckboxShouldBeUnchecked(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
-
- def testSingleCheckboxCheckedByDefault(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"bar"), (u"checked", u"")]}])
-
- def testSingleCheckboxCheckedByDefaultShouldBeUnchecked(self):
- self.runTest(
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux"), (u"checked", u"")]}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"EmptyTag", "name": u"input",
- "data": [(u"type", u"checkbox"), (u"name", u"foo"), (u"value", u"quux")]}])
-
- def testSingleTextareaWithValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"textarea", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"textarea", "data": []}])
-
- def testSingleTextareaWithoutValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
- {"type": u"EndTag", "name": u"textarea", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"textarea", "data": [(u"name", u"foo")]},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"textarea", "data": []}])
-
- def testSingleSelectWithValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectWithValueShouldBeUnselected(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "quux"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectWithoutValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectWithoutValueShouldBeUnselected(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "quux"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectTwoOptionsWithValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectTwoOptionsWithValueShouldBeUnselected(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "quux"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"baz")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectTwoOptionsWithoutValue(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "bar"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"selected", u"")]},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectTwoOptionsWithoutValueShouldBeUnselected(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"baz"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": "quux"}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"bar"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": []},
- {"type": u"Characters", "data": u"baz"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testSingleSelectMultiple(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": ["bar", "quux"]}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo"), (u"multiple", u"")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
- def testTwoSelect(self):
- self.runTest(
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []},
- {"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}],
- FieldStorage({"foo": ["bar", "quux"]}),
- [{"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []},
- {"type": u"StartTag", "name": u"select", "data": [(u"name", u"foo")]},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"bar")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"StartTag", "name": u"option", "data": [(u"value", u"quux"), (u"selected", u"")]},
- {"type": u"Characters", "data": u"quux"},
- {"type": u"EndTag", "name": u"option", "data": []},
- {"type": u"EndTag", "name": u"select", "data": []}])
-
-def buildTestSuite():
- return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-def main():
- buildTestSuite()
- unittest.main()
-
-if __name__ == "__main__":
- main()
diff --git a/html5lib/tests/test_parser.py b/html5lib/tests/test_parser.py
deleted file mode 100644
index c6704faa..00000000
--- a/html5lib/tests/test_parser.py
+++ /dev/null
@@ -1,140 +0,0 @@
-import os
-import sys
-import traceback
-import StringIO
-import warnings
-import re
-
-warnings.simplefilter("error")
-
-from support import html5lib_test_files as data_files
-from support import TestData, convert, convertExpected
-import html5lib
-from html5lib import html5parser, treebuilders, constants
-
-treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
- "DOM":treebuilders.getTreeBuilder("dom")}
-
-#Try whatever etree implementations are avaliable from a list that are
-#"supposed" to work
-try:
- import xml.etree.ElementTree as ElementTree
- treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
-except ImportError:
- try:
- import elementtree.ElementTree as ElementTree
- treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
- except ImportError:
- pass
-
-try:
- import xml.etree.cElementTree as cElementTree
- treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
-except ImportError:
- try:
- import cElementTree
- treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
- except ImportError:
- pass
-
-try:
- try:
- import lxml.html as lxml
- except ImportError:
- import lxml.etree as lxml
- treeTypes['lxml'] = treebuilders.getTreeBuilder("lxml", lxml, fullTree=True)
-except ImportError:
- pass
-
-try:
- import BeautifulSoup
- treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
-except ImportError:
- pass
-
-#Try whatever dom implementations are avaliable from a list that are
-#"supposed" to work
-try:
- import pxdom
- treeTypes["pxdom"] = treebuilders.getTreeBuilder("dom", pxdom)
-except ImportError:
- pass
-
-#Run the parse error checks
-checkParseErrors = False
-
-#XXX - There should just be one function here but for some reason the testcase
-#format differs from the treedump format by a single space character
-def convertTreeDump(data):
- return "\n".join(convert(3)(data).split("\n")[1:])
-
-namespaceExpected = re.compile(r"^(\s*)<(\S+)>", re.M).sub
-
-
-def runParserTest(innerHTML, input, expected, errors, treeClass,
- namespaceHTMLElements):
- #XXX - move this out into the setup function
- #concatenate all consecutive character tokens into a single token
- try:
- p = html5parser.HTMLParser(tree = treeClass,
- namespaceHTMLElements=namespaceHTMLElements)
- except constants.DataLossWarning:
- return
-
- try:
- if innerHTML:
- document = p.parseFragment(input, innerHTML)
- else:
- try:
- document = p.parse(input)
- except constants.DataLossWarning:
- return
- except:
- errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
- u"\nTraceback:", traceback.format_exc()])
- assert False, errorMsg.encode("utf8")
-
- output = convertTreeDump(p.tree.testSerializer(document))
-
- expected = convertExpected(expected)
- if namespaceHTMLElements:
- expected = namespaceExpected(r"\1", expected)
-
- errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected,
- u"\nReceived:", output])
- assert expected == output, errorMsg.encode("utf8")
- errStr = [u"Line: %i Col: %i %s"%(line, col,
- constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for
- ((line,col), errorcode, datavars) in p.errors]
-
- errorMsg2 = u"\n".join([u"\n\nInput:", input,
- u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors),
- u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)])
- if checkParseErrors:
- assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
-
-def test_parser():
- sys.stderr.write('Testing tree builders '+ " ".join(treeTypes.keys()) + "\n")
- files = data_files('tree-construction')
-
- for filename in files:
- testName = os.path.basename(filename).replace(".dat","")
-
- tests = TestData(filename, "data")
-
- for index, test in enumerate(tests):
- input, errors, innerHTML, expected = [test[key] for key in
- 'data', 'errors',
- 'document-fragment',
- 'document']
- if errors:
- errors = errors.split("\n")
-
- for treeName, treeCls in treeTypes.iteritems():
- for namespaceHTMLElements in (True, False):
- print input
- yield (runParserTest, innerHTML, input, expected, errors, treeCls,
- namespaceHTMLElements)
- break
-
-
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
deleted file mode 100755
index 3e1c80c3..00000000
--- a/html5lib/tests/test_parser2.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import support
-from html5lib import html5parser
-from html5lib.constants import namespaces
-from html5lib.treebuilders import dom
-
-import unittest
-
-# tests that aren't autogenerated from text files
-class MoreParserTests(unittest.TestCase):
-
- def test_assertDoctypeCloneable(self):
- parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
- doc = parser.parse('')
- self.assert_(doc.cloneNode(True))
-
- def test_line_counter(self):
- # http://groups.google.com/group/html5lib-discuss/browse_frm/thread/f4f00e4a2f26d5c0
- parser = html5parser.HTMLParser(tree=dom.TreeBuilder)
- parser.parse("\nx\n>\n ")
-
- def test_namespace_html_elements_0(self):
- parser = html5parser.HTMLParser(namespaceHTMLElements=True)
- doc = parser.parse("")
- self.assert_(doc.childNodes[0].namespace == namespaces["html"])
-
- def test_namespace_html_elements_1(self):
- parser = html5parser.HTMLParser(namespaceHTMLElements=False)
- doc = parser.parse("")
- self.assert_(doc.childNodes[0].namespace == None)
-
-def buildTestSuite():
- return unittest.defaultTestLoader.loadTestsFromName(__name__)
-
-def main():
- buildTestSuite()
- unittest.main()
-
-if __name__ == '__main__':
- main()
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
deleted file mode 100644
index aabed780..00000000
--- a/html5lib/tests/test_sanitizer.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import os
-import sys
-import unittest
-
-try:
- import json
-except ImportError:
- import simplejson as json
-
-from html5lib import html5parser, sanitizer, constants
-
-def runSanitizerTest(name, expected, input):
- expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
- parseFragment(expected).childNodes])
- expected = json.loads(json.dumps(expected))
- assert expected == sanitize_html(input)
-
-def sanitize_html(stream):
- return ''.join([token.toxml() for token in
- html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
- parseFragment(stream).childNodes])
-
-def test_should_handle_astral_plane_characters():
- assert u"\U0001d4b5 \U0001d538
" == sanitize_html("𝒵 𝔸
")
-
-def test_sanitizer():
- for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
- if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
- continue ### TODO
- if tag_name != tag_name.lower():
- continue ### TODO
- if tag_name == 'image':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- " foo <bad>bar</bad> baz",
- "<%s title='1'>foo bar baz%s>" % (tag_name,tag_name))
- elif tag_name == 'br':
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- " foo <bad>bar</bad> baz ",
- "<%s title='1'>foo bar baz%s>" % (tag_name,tag_name))
- elif tag_name in constants.voidElements:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
- "<%s title='1'>foo bar baz%s>" % (tag_name,tag_name))
- else:
- yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\">foo <bad>bar</bad> baz%s>" % (tag_name,tag_name),
- "<%s title='1'>foo bar baz%s>" % (tag_name,tag_name))
-
- for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
- tag_name = tag_name.upper()
- yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
- "<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name),
- "<%s title='1'>foo bar baz%s>" % (tag_name,tag_name))
-
- for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
- if attribute_name != attribute_name.lower(): continue ### TODO
- if attribute_name == 'style': continue
- yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
- "foo <bad>bar</bad> baz
" % attribute_name,
- "foo bar baz
" % attribute_name)
-
- for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
- attribute_name = attribute_name.upper()
- yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
- "foo <bad>bar</bad> baz
",
- "foo bar baz
" % attribute_name)
-
- for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
- yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
- "foo " % protocol,
- """foo """ % protocol)
-
- for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
- yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
- "foo " % protocol,
- """foo """ % protocol)
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
deleted file mode 100644
index 132620b1..00000000
--- a/html5lib/tests/test_serializer.py
+++ /dev/null
@@ -1,180 +0,0 @@
-import os
-import unittest
-from support import html5lib_test_files
-
-try:
- import json
-except ImportError:
- import simplejson as json
-
-import html5lib
-from html5lib import html5parser, serializer, constants
-from html5lib.treewalkers._base import TreeWalker
-
-optionals_loaded = []
-
-try:
- from lxml import etree
- optionals_loaded.append("lxml")
-except ImportError:
- pass
-
-default_namespace = constants.namespaces["html"]
-
-class JsonWalker(TreeWalker):
- def __iter__(self):
- for token in self.tree:
- type = token[0]
- if type == "StartTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:4]
- else:
- namespace = default_namespace
- name, attrib = token[1:3]
- yield self.startTag(namespace, name, self._convertAttrib(attrib))
- elif type == "EndTag":
- if len(token) == 3:
- namespace, name = token[1:3]
- else:
- namespace = default_namespace
- name = token[1]
- yield self.endTag(namespace, name)
- elif type == "EmptyTag":
- if len(token) == 4:
- namespace, name, attrib = token[1:]
- else:
- namespace = default_namespace
- name, attrib = token[1:]
- for token in self.emptyTag(namespace, name, self._convertAttrib(attrib)):
- yield token
- elif type == "Comment":
- yield self.comment(token[1])
- elif type in ("Characters", "SpaceCharacters"):
- for token in self.text(token[1]):
- yield token
- elif type == "Doctype":
- if len(token) == 4:
- yield self.doctype(token[1], token[2], token[3])
- elif len(token) == 3:
- yield self.doctype(token[1], token[2])
- else:
- yield self.doctype(token[1])
- else:
- raise ValueError("Unknown token type: " + type)
-
- def _convertAttrib(self, attribs):
- """html5lib tree-walkers use a dict of (namespace, name): value for
- attributes, but JSON cannot represent this. Convert from the format
- in the serializer tests (a list of dicts with "namespace", "name",
- and "value" as keys) to html5lib's tree-walker format."""
- attrs = {}
- for attrib in attribs:
- name = (attrib["namespace"], attrib["name"])
- assert(name not in attrs)
- attrs[name] = attrib["value"]
- return attrs
-
-
-def serialize_html(input, options):
- options = dict([(str(k),v) for k,v in options.iteritems()])
- return serializer.HTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
-
-def serialize_xhtml(input, options):
- options = dict([(str(k),v) for k,v in options.iteritems()])
- return serializer.XHTMLSerializer(**options).render(JsonWalker(input),options.get("encoding",None))
-
-def make_test(input, expected, xhtml, options):
- result = serialize_html(input, options)
- if len(expected) == 1:
- assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:False\n%s"%(expected[0], result, str(options))
- elif result not in expected:
- assert False, "Expected: %s, Received: %s" % (expected, result)
-
- if not xhtml:
- return
-
- result = serialize_xhtml(input, options)
- if len(xhtml) == 1:
- assert xhtml[0] == result, "Expected:\n%s\nActual:\n%s\nOptions\nxhtml:True\n%s"%(xhtml[0], result, str(options))
- elif result not in xhtml:
- assert False, "Expected: %s, Received: %s" % (xhtml, result)
-
-
-class EncodingTestCase(unittest.TestCase):
- def throwsWithLatin1(self, input):
- self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"})
-
- def testDoctypeName(self):
- self.throwsWithLatin1([["Doctype", u"\u0101"]])
-
- def testDoctypePublicId(self):
- self.throwsWithLatin1([["Doctype", u"potato", u"\u0101"]])
-
- def testDoctypeSystemId(self):
- self.throwsWithLatin1([["Doctype", u"potato", u"potato", u"\u0101"]])
-
- def testCdataCharacters(self):
- self.assertEquals("
-#encoding
-iso8859-2
-
-#data
-
-
-#encoding
-iso8859-2
-
-#data
-
-
-
-#encoding
-iso8859-2
diff --git a/html5lib/tests/testdata/encoding/tests2.dat b/html5lib/tests/testdata/encoding/tests2.dat
deleted file mode 100644
index eee44984..00000000
--- a/html5lib/tests/testdata/encoding/tests2.dat
+++ /dev/null
@@ -1,115 +0,0 @@
-#data
-
-#encoding
-utf-8
-
-#data
-
-
-#encoding
-windows-1252
-
-#data
-
-#encoding
-utf-8
-
-#data
-
-#encoding
-windows-1252
-
-#data
-
-#encoding
-utf-8
-
-#data
-
-#encoding
-utf-8
-
-#data
-
-#encoding
-utf-8
-
-#data
-
-#encoding
-utf-8
-
-#data
-
-
-#encoding
-utf-8
-
-#data
-
-
-#encoding
-utf-8
-
-#data
-ñ
-
-#encoding
-utf-8
diff --git a/html5lib/tests/testdata/sanitizer/tests1.dat b/html5lib/tests/testdata/sanitizer/tests1.dat
deleted file mode 100644
index c741cb8c..00000000
--- a/html5lib/tests/testdata/sanitizer/tests1.dat
+++ /dev/null
@@ -1,501 +0,0 @@
-[
- {
- "name": "IE_Comments",
- "input": "",
- "output": ""
- },
-
- {
- "name": "IE_Comments_2",
- "input": "",
- "output": "<script>alert('XSS');</script>",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "allow_colons_in_path_component",
- "input": "foo ",
- "output": "foo "
- },
-
- {
- "name": "background_attribute",
- "input": "
",
- "output": "
",
- "xhtml": "
",
- "rexml": "
"
- },
-
- {
- "name": "bgsound",
- "input": " ",
- "output": "<bgsound src=\"javascript:alert('XSS');\"/>",
- "rexml": "<bgsound src=\"javascript:alert('XSS');\"></bgsound>"
- },
-
- {
- "name": "div_background_image_unicode_encoded",
- "input": "foo
",
- "output": "foo
"
- },
-
- {
- "name": "div_expression",
- "input": "foo
",
- "output": "foo
"
- },
-
- {
- "name": "double_open_angle_brackets",
- "input": " ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "double_open_angle_brackets_2",
- "input": "",
- "output": " ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "img_dynsrc_lowsrc",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "img_vbscript",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "input_image",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "link_stylesheets",
- "input": " ",
- "output": "<link rel=\"stylesheet\" href=\"javascript:alert('XSS');\"/>",
- "rexml": "<link href=\"javascript:alert('XSS');\" rel=\"stylesheet\"/>"
- },
-
- {
- "name": "link_stylesheets_2",
- "input": " ",
- "output": "<link rel=\"stylesheet\" href=\"http://ha.ckers.org/xss.css\"/>",
- "rexml": "<link href=\"http://ha.ckers.org/xss.css\" rel=\"stylesheet\"/>"
- },
-
- {
- "name": "list_style_image",
- "input": "foo ",
- "output": "foo "
- },
-
- {
- "name": "no_closing_script_tags",
- "input": "",
- "output": "<script XSS=\"\" src=\"http://ha.ckers.org/xss.js\"></script>",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "non_alpha_non_digit_2",
- "input": "foo ",
- "output": "foo ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "non_alpha_non_digit_3",
- "input": " ",
- "output": " ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "non_alpha_non_digit_II",
- "input": "foo ",
- "output": "foo ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "non_alpha_non_digit_III",
- "input": "foo ",
- "output": "foo ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "platypus",
- "input": "never trust your upstream platypus ",
- "output": "never trust your upstream platypus "
- },
-
- {
- "name": "protocol_resolution_in_script_tag",
- "input": "",
- "output": "<script src=\"//ha.ckers.org/.j\"></script>",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_allow_anchors",
- "input": " ",
- "output": "<script>baz</script> "
- },
-
- {
- "name": "should_allow_image_alt_attribute",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_allow_image_height_attribute",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_allow_image_src_attribute",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_allow_image_width_attribute",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_handle_blank_text",
- "input": "",
- "output": ""
- },
-
- {
- "name": "should_handle_malformed_image_tags",
- "input": " \">",
- "output": " <script>alert(\"XSS\")</script>\">",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_handle_non_html",
- "input": "abc",
- "output": "abc"
- },
-
- {
- "name": "should_not_fall_for_ridiculous_hack",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_0",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_1",
- "input": " ",
- "output": " ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_10",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_11",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_12",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_13",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_14",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_2",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_3",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_4",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_5",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_6",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_7",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_8",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_not_fall_for_xss_image_hack_9",
- "input": " ",
- "output": " ",
- "rexml": " "
- },
-
- {
- "name": "should_sanitize_half_open_scripts",
- "input": " ",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_sanitize_invalid_script_tag",
- "input": "",
- "output": "<script XSS=\"\" SRC=\"http://ha.ckers.org/xss.js\"></script>",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_sanitize_script_tag_with_multiple_open_brackets",
- "input": "<",
- "output": "<<script>alert(\"XSS\");//<</script>",
- "rexml": "Ill-formed XHTML!"
- },
-
- {
- "name": "should_sanitize_script_tag_with_multiple_open_brackets_2",
- "input": "
-#errors
-Line: 1 Col: 9 Unexpected end tag (strong). Expected DOCTYPE.
-Line: 1 Col: 9 Unexpected end tag (strong) after the (implied) root element.
-Line: 1 Col: 13 Unexpected end tag (b) after the (implied) root element.
-Line: 1 Col: 18 Unexpected end tag (em) after the (implied) root element.
-Line: 1 Col: 22 Unexpected end tag (i) after the (implied) root element.
-Line: 1 Col: 26 Unexpected end tag (u) after the (implied) root element.
-Line: 1 Col: 35 Unexpected end tag (strike) after the (implied) root element.
-Line: 1 Col: 39 Unexpected end tag (s) after the (implied) root element.
-Line: 1 Col: 47 Unexpected end tag (blink) after the (implied) root element.
-Line: 1 Col: 52 Unexpected end tag (tt) after the (implied) root element.
-Line: 1 Col: 58 Unexpected end tag (pre) after the (implied) root element.
-Line: 1 Col: 64 Unexpected end tag (big) after the (implied) root element.
-Line: 1 Col: 72 Unexpected end tag (small) after the (implied) root element.
-Line: 1 Col: 79 Unexpected end tag (font) after the (implied) root element.
-Line: 1 Col: 88 Unexpected end tag (select) after the (implied) root element.
-Line: 1 Col: 93 Unexpected end tag (h1) after the (implied) root element.
-Line: 1 Col: 98 Unexpected end tag (h2) after the (implied) root element.
-Line: 1 Col: 103 Unexpected end tag (h3) after the (implied) root element.
-Line: 1 Col: 108 Unexpected end tag (h4) after the (implied) root element.
-Line: 1 Col: 113 Unexpected end tag (h5) after the (implied) root element.
-Line: 1 Col: 118 Unexpected end tag (h6) after the (implied) root element.
-Line: 1 Col: 125 Unexpected end tag (body) after the (implied) root element.
-Line: 1 Col: 130 Unexpected end tag (br). Treated as br element.
-Line: 1 Col: 134 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 140 This element (img) has no end tag.
-Line: 1 Col: 148 Unexpected end tag (title). Ignored.
-Line: 1 Col: 155 Unexpected end tag (span). Ignored.
-Line: 1 Col: 163 Unexpected end tag (style). Ignored.
-Line: 1 Col: 172 Unexpected end tag (script). Ignored.
-Line: 1 Col: 180 Unexpected end tag (table). Ignored.
-Line: 1 Col: 185 Unexpected end tag (th). Ignored.
-Line: 1 Col: 190 Unexpected end tag (td). Ignored.
-Line: 1 Col: 195 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 203 This element (frame) has no end tag.
-Line: 1 Col: 210 This element (area) has no end tag.
-Line: 1 Col: 217 Unexpected end tag (link). Ignored.
-Line: 1 Col: 225 This element (param) has no end tag.
-Line: 1 Col: 230 This element (hr) has no end tag.
-Line: 1 Col: 238 This element (input) has no end tag.
-Line: 1 Col: 244 Unexpected end tag (col). Ignored.
-Line: 1 Col: 251 Unexpected end tag (base). Ignored.
-Line: 1 Col: 258 Unexpected end tag (meta). Ignored.
-Line: 1 Col: 269 This element (basefont) has no end tag.
-Line: 1 Col: 279 This element (bgsound) has no end tag.
-Line: 1 Col: 287 This element (embed) has no end tag.
-Line: 1 Col: 296 This element (spacer) has no end tag.
-Line: 1 Col: 300 Unexpected end tag (p). Ignored.
-Line: 1 Col: 305 End tag (dd) seen too early. Expected other end tag.
-Line: 1 Col: 310 End tag (dt) seen too early. Expected other end tag.
-Line: 1 Col: 320 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 331 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 339 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 347 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 355 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 365 End tag (address) seen too early. Expected other end tag.
-Line: 1 Col: 378 End tag (blockquote) seen too early. Expected other end tag.
-Line: 1 Col: 387 End tag (center) seen too early. Expected other end tag.
-Line: 1 Col: 393 Unexpected end tag (dir). Ignored.
-Line: 1 Col: 399 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 404 End tag (dl) seen too early. Expected other end tag.
-Line: 1 Col: 415 End tag (fieldset) seen too early. Expected other end tag.
-Line: 1 Col: 425 End tag (listing) seen too early. Expected other end tag.
-Line: 1 Col: 432 End tag (menu) seen too early. Expected other end tag.
-Line: 1 Col: 437 End tag (ol) seen too early. Expected other end tag.
-Line: 1 Col: 442 End tag (ul) seen too early. Expected other end tag.
-Line: 1 Col: 447 End tag (li) seen too early. Expected other end tag.
-Line: 1 Col: 454 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 460 This element (wbr) has no end tag.
-Line: 1 Col: 476 End tag (button) seen too early. Expected other end tag.
-Line: 1 Col: 486 End tag (marquee) seen too early. Expected other end tag.
-Line: 1 Col: 495 End tag (object) seen too early. Expected other end tag.
-Line: 1 Col: 513 Unexpected end tag (html). Ignored.
-Line: 1 Col: 513 Unexpected end tag (frameset). Ignored.
-Line: 1 Col: 520 Unexpected end tag (head). Ignored.
-Line: 1 Col: 529 Unexpected end tag (iframe). Ignored.
-Line: 1 Col: 537 This element (image) has no end tag.
-Line: 1 Col: 547 This element (isindex) has no end tag.
-Line: 1 Col: 557 Unexpected end tag (noembed). Ignored.
-Line: 1 Col: 568 Unexpected end tag (noframes). Ignored.
-Line: 1 Col: 579 Unexpected end tag (noscript). Ignored.
-Line: 1 Col: 590 Unexpected end tag (optgroup). Ignored.
-Line: 1 Col: 599 Unexpected end tag (option). Ignored.
-Line: 1 Col: 611 Unexpected end tag (plaintext). Ignored.
-Line: 1 Col: 622 Unexpected end tag (textarea). Ignored.
-#document
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-Line: 1 Col: 7 Unexpected start tag (table). Expected DOCTYPE.
-Line: 1 Col: 20 Unexpected end tag (strong) in table context caused voodoo mode.
-Line: 1 Col: 20 End tag (strong) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 24 Unexpected end tag (b) in table context caused voodoo mode.
-Line: 1 Col: 24 End tag (b) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 29 Unexpected end tag (em) in table context caused voodoo mode.
-Line: 1 Col: 29 End tag (em) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 33 Unexpected end tag (i) in table context caused voodoo mode.
-Line: 1 Col: 33 End tag (i) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 37 Unexpected end tag (u) in table context caused voodoo mode.
-Line: 1 Col: 37 End tag (u) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 46 Unexpected end tag (strike) in table context caused voodoo mode.
-Line: 1 Col: 46 End tag (strike) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 50 Unexpected end tag (s) in table context caused voodoo mode.
-Line: 1 Col: 50 End tag (s) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 58 Unexpected end tag (blink) in table context caused voodoo mode.
-Line: 1 Col: 58 Unexpected end tag (blink). Ignored.
-Line: 1 Col: 63 Unexpected end tag (tt) in table context caused voodoo mode.
-Line: 1 Col: 63 End tag (tt) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 69 Unexpected end tag (pre) in table context caused voodoo mode.
-Line: 1 Col: 69 End tag (pre) seen too early. Expected other end tag.
-Line: 1 Col: 75 Unexpected end tag (big) in table context caused voodoo mode.
-Line: 1 Col: 75 End tag (big) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 83 Unexpected end tag (small) in table context caused voodoo mode.
-Line: 1 Col: 83 End tag (small) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 90 Unexpected end tag (font) in table context caused voodoo mode.
-Line: 1 Col: 90 End tag (font) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 99 Unexpected end tag (select) in table context caused voodoo mode.
-Line: 1 Col: 99 Unexpected end tag (select). Ignored.
-Line: 1 Col: 104 Unexpected end tag (h1) in table context caused voodoo mode.
-Line: 1 Col: 104 End tag (h1) seen too early. Expected other end tag.
-Line: 1 Col: 109 Unexpected end tag (h2) in table context caused voodoo mode.
-Line: 1 Col: 109 End tag (h2) seen too early. Expected other end tag.
-Line: 1 Col: 114 Unexpected end tag (h3) in table context caused voodoo mode.
-Line: 1 Col: 114 End tag (h3) seen too early. Expected other end tag.
-Line: 1 Col: 119 Unexpected end tag (h4) in table context caused voodoo mode.
-Line: 1 Col: 119 End tag (h4) seen too early. Expected other end tag.
-Line: 1 Col: 124 Unexpected end tag (h5) in table context caused voodoo mode.
-Line: 1 Col: 124 End tag (h5) seen too early. Expected other end tag.
-Line: 1 Col: 129 Unexpected end tag (h6) in table context caused voodoo mode.
-Line: 1 Col: 129 End tag (h6) seen too early. Expected other end tag.
-Line: 1 Col: 136 Unexpected end tag (body) in the table row phase. Ignored.
-Line: 1 Col: 141 Unexpected end tag (br) in table context caused voodoo mode.
-Line: 1 Col: 141 Unexpected end tag (br). Treated as br element.
-Line: 1 Col: 145 Unexpected end tag (a) in table context caused voodoo mode.
-Line: 1 Col: 145 End tag (a) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 151 Unexpected end tag (img) in table context caused voodoo mode.
-Line: 1 Col: 151 This element (img) has no end tag.
-Line: 1 Col: 159 Unexpected end tag (title) in table context caused voodoo mode.
-Line: 1 Col: 159 Unexpected end tag (title). Ignored.
-Line: 1 Col: 166 Unexpected end tag (span) in table context caused voodoo mode.
-Line: 1 Col: 166 Unexpected end tag (span). Ignored.
-Line: 1 Col: 174 Unexpected end tag (style) in table context caused voodoo mode.
-Line: 1 Col: 174 Unexpected end tag (style). Ignored.
-Line: 1 Col: 183 Unexpected end tag (script) in table context caused voodoo mode.
-Line: 1 Col: 183 Unexpected end tag (script). Ignored.
-Line: 1 Col: 196 Unexpected end tag (th). Ignored.
-Line: 1 Col: 201 Unexpected end tag (td). Ignored.
-Line: 1 Col: 206 Unexpected end tag (tr). Ignored.
-Line: 1 Col: 214 This element (frame) has no end tag.
-Line: 1 Col: 221 This element (area) has no end tag.
-Line: 1 Col: 228 Unexpected end tag (link). Ignored.
-Line: 1 Col: 236 This element (param) has no end tag.
-Line: 1 Col: 241 This element (hr) has no end tag.
-Line: 1 Col: 249 This element (input) has no end tag.
-Line: 1 Col: 255 Unexpected end tag (col). Ignored.
-Line: 1 Col: 262 Unexpected end tag (base). Ignored.
-Line: 1 Col: 269 Unexpected end tag (meta). Ignored.
-Line: 1 Col: 280 This element (basefont) has no end tag.
-Line: 1 Col: 290 This element (bgsound) has no end tag.
-Line: 1 Col: 298 This element (embed) has no end tag.
-Line: 1 Col: 307 This element (spacer) has no end tag.
-Line: 1 Col: 311 Unexpected end tag (p). Ignored.
-Line: 1 Col: 316 End tag (dd) seen too early. Expected other end tag.
-Line: 1 Col: 321 End tag (dt) seen too early. Expected other end tag.
-Line: 1 Col: 331 Unexpected end tag (caption). Ignored.
-Line: 1 Col: 342 Unexpected end tag (colgroup). Ignored.
-Line: 1 Col: 350 Unexpected end tag (tbody). Ignored.
-Line: 1 Col: 358 Unexpected end tag (tfoot). Ignored.
-Line: 1 Col: 366 Unexpected end tag (thead). Ignored.
-Line: 1 Col: 376 End tag (address) seen too early. Expected other end tag.
-Line: 1 Col: 389 End tag (blockquote) seen too early. Expected other end tag.
-Line: 1 Col: 398 End tag (center) seen too early. Expected other end tag.
-Line: 1 Col: 404 Unexpected end tag (dir). Ignored.
-Line: 1 Col: 410 End tag (div) seen too early. Expected other end tag.
-Line: 1 Col: 415 End tag (dl) seen too early. Expected other end tag.
-Line: 1 Col: 426 End tag (fieldset) seen too early. Expected other end tag.
-Line: 1 Col: 436 End tag (listing) seen too early. Expected other end tag.
-Line: 1 Col: 443 End tag (menu) seen too early. Expected other end tag.
-Line: 1 Col: 448 End tag (ol) seen too early. Expected other end tag.
-Line: 1 Col: 453 End tag (ul) seen too early. Expected other end tag.
-Line: 1 Col: 458 End tag (li) seen too early. Expected other end tag.
-Line: 1 Col: 465 End tag (nobr) violates step 1, paragraph 1 of the adoption agency algorithm.
-Line: 1 Col: 471 This element (wbr) has no end tag.
-Line: 1 Col: 487 End tag (button) seen too early. Expected other end tag.
-Line: 1 Col: 497 End tag (marquee) seen too early. Expected other end tag.
-Line: 1 Col: 506 End tag (object) seen too early. Expected other end tag.
-Line: 1 Col: 524 Unexpected end tag (html). Ignored.
-Line: 1 Col: 524 Unexpected end tag (frameset). Ignored.
-Line: 1 Col: 531 Unexpected end tag (head). Ignored.
-Line: 1 Col: 540 Unexpected end tag (iframe). Ignored.
-Line: 1 Col: 548 This element (image) has no end tag.
-Line: 1 Col: 558 This element (isindex) has no end tag.
-Line: 1 Col: 568 Unexpected end tag (noembed). Ignored.
-Line: 1 Col: 579 Unexpected end tag (noframes). Ignored.
-Line: 1 Col: 590 Unexpected end tag (noscript). Ignored.
-Line: 1 Col: 601 Unexpected end tag (optgroup). Ignored.
-Line: 1 Col: 610 Unexpected end tag (option). Ignored.
-Line: 1 Col: 622 Unexpected end tag (plaintext). Ignored.
-Line: 1 Col: 633 Unexpected end tag (textarea). Ignored.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-Line: 1 Col: 10 Unexpected start tag (frameset). Expected DOCTYPE.
-Line: 1 Col: 10 Expected closing tag. Unexpected end of file.
-#document
-|
-|
-|
diff --git a/html5lib/tests/testdata/tree-construction/tests10.dat b/html5lib/tests/testdata/tree-construction/tests10.dat
deleted file mode 100644
index 4f8df86f..00000000
--- a/html5lib/tests/testdata/tree-construction/tests10.dat
+++ /dev/null
@@ -1,799 +0,0 @@
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-29: Bogus comment
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-35: Stray “svg†start tag.
-42: Stray end tag “svgâ€
-#document
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-43: Stray “svg†start tag.
-50: Stray end tag “svgâ€
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-34: Start tag “svg†seen in “tableâ€.
-41: Stray end tag “svgâ€.
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-34: Start tag “svg†seen in “tableâ€.
-46: Stray end tag “gâ€.
-53: Stray end tag “svgâ€.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-
-#data
-
-#errors
-34: Start tag “svg†seen in “tableâ€.
-46: Stray end tag “gâ€.
-58: Stray end tag “gâ€.
-65: Stray end tag “svgâ€.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-
-#data
-
-#errors
-41: Start tag “svg†seen in “tableâ€.
-53: Stray end tag “gâ€.
-65: Stray end tag “gâ€.
-72: Stray end tag “svgâ€.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-|
-
-#data
-
-#errors
-45: Start tag “svg†seen in “tableâ€.
-57: Stray end tag “gâ€.
-69: Stray end tag “gâ€.
-76: Stray end tag “svgâ€.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-
-#data
-
quux
-#errors
-70: HTML start tag “p†in a foreign namespace context.
-81: “table†closed but “caption†was still open.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-|
-| "quux"
-
-#data
-
quux
-#errors
-78: “table†closed but “caption†was still open.
-78: Unclosed elements on stack.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-| "baz"
-|
-| "quux"
-
-#data
-
quux
-#errors
-44: Start tag “svg†seen in “tableâ€.
-56: Stray end tag “gâ€.
-68: Stray end tag “gâ€.
-71: HTML start tag “p†in a foreign namespace context.
-71: Start tag “p†seen in “tableâ€.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-|
-|
-|
-| "quux"
-
-#data
-
quux
-#errors
-50: Stray “svg†start tag.
-54: Stray “g†start tag.
-62: Stray end tag “gâ€
-66: Stray “g†start tag.
-74: Stray end tag “gâ€
-77: Stray “p†start tag.
-88: “table†end tag with “select†open.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-| "foobarbaz"
-|
-| "quux"
-
-#data
-
quux
-#errors
-36: Start tag “select†seen in “tableâ€.
-42: Stray “svg†start tag.
-46: Stray “g†start tag.
-54: Stray end tag “gâ€
-58: Stray “g†start tag.
-66: Stray end tag “gâ€
-69: Stray “p†start tag.
-80: “table†end tag with “select†open.
-#document
-|
-|
-|
-|
-|
-| "foobarbaz"
-|
-|
-| "quux"
-
-#data
-
foo bar baz
-#errors
-41: Stray “svg†start tag.
-68: HTML start tag “p†in a foreign namespace context.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-
-#data
-
foo bar baz
-#errors
-34: Stray “svg†start tag.
-61: HTML start tag “p†in a foreign namespace context.
-#document
-|
-|
-|
-|
-|
-|
-| "foo"
-|
-| "bar"
-|
-| "baz"
-
-#data
-
-#errors
-31: Stray “svg†start tag.
-35: Stray “g†start tag.
-40: Stray end tag “gâ€
-44: Stray “g†start tag.
-49: Stray end tag “gâ€
-52: Stray “p†start tag.
-58: Stray “span†start tag.
-58: End of file seen and there were open elements.
-#document
-|
-|
-|
-|
-
-#data
-
-#errors
-42: Stray “svg†start tag.
-46: Stray “g†start tag.
-51: Stray end tag “gâ€
-55: Stray “g†start tag.
-60: Stray end tag “gâ€
-63: Stray “p†start tag.
-69: Stray “span†start tag.
-#document
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-| xlink:href="foo"
-|
-| xlink href="foo"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-| xlink:href="foo"
-| xml:lang="en"
-|
-|
-| xlink href="foo"
-| xml lang="en"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-| xlink:href="foo"
-| xml:lang="en"
-|
-|
-| xlink href="foo"
-| xml lang="en"
-
-#data
- bar
-#errors
-#document
-|
-|
-|
-|
-| xlink:href="foo"
-| xml:lang="en"
-|
-|
-| xlink href="foo"
-| xml lang="en"
-| "bar"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-
-#data
-
a
-#errors
-#document
-|
-|
-|
-|
-|
-| "a"
-
-#data
-a
-#errors
-#document
-|
-|
-|
-|
-|
-|
-| "a"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-
-#data
-a
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "a"
-
-#data
-a
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "a"
-
-#data
-a
-#errors
-40: HTML start tag “ul†in a foreign namespace context.
-41: End of file in a foreign namespace context.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-| "a"
-
-#data
-a
-#errors
-35: HTML start tag “ul†in a foreign namespace context.
-36: End of file in a foreign namespace context.
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-| "a"
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-
-#data
-
-#errors
-#document
-|
-|
-|
-|
-|
-|
-|
-
-#data
-