mirror of
https://github.com/rembo10/headphones.git
synced 2026-05-03 18:29:32 +01:00
Include html5lib for BeautifulSoup
BeautifulSoup needs lxml or html5, have included html5lib. Also latest BeautifulSoup 4.1.3
This commit is contained in:
76
html5lib/tests/test_sanitizer.py
Normal file
76
html5lib/tests/test_sanitizer.py
Normal file
@@ -0,0 +1,76 @@
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
try:
|
||||
import json
|
||||
except ImportError:
|
||||
import simplejson as json
|
||||
|
||||
from html5lib import html5parser, sanitizer, constants
|
||||
|
||||
def runSanitizerTest(name, expected, input):
|
||||
expected = ''.join([token.toxml() for token in html5parser.HTMLParser().
|
||||
parseFragment(expected).childNodes])
|
||||
expected = json.loads(json.dumps(expected))
|
||||
assert expected == sanitize_html(input)
|
||||
|
||||
def sanitize_html(stream):
|
||||
return ''.join([token.toxml() for token in
|
||||
html5parser.HTMLParser(tokenizer=sanitizer.HTMLSanitizer).
|
||||
parseFragment(stream).childNodes])
|
||||
|
||||
def test_should_handle_astral_plane_characters():
|
||||
assert u"<p>\U0001d4b5 \U0001d538</p>" == sanitize_html("<p>𝒵 𝔸</p>")
|
||||
|
||||
def test_sanitizer():
|
||||
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
|
||||
if tag_name in ['caption', 'col', 'colgroup', 'optgroup', 'option', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr']:
|
||||
continue ### TODO
|
||||
if tag_name != tag_name.lower():
|
||||
continue ### TODO
|
||||
if tag_name == 'image':
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
"<img title=\"1\"/>foo <bad>bar</bad> baz",
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
||||
elif tag_name == 'br':
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
"<br title=\"1\"/>foo <bad>bar</bad> baz<br/>",
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
||||
elif tag_name in constants.voidElements:
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
"<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
||||
else:
|
||||
yield (runSanitizerTest, "test_should_allow_%s_tag" % tag_name,
|
||||
"<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name),
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
||||
|
||||
for tag_name in sanitizer.HTMLSanitizer.allowed_elements:
|
||||
tag_name = tag_name.upper()
|
||||
yield (runSanitizerTest, "test_should_forbid_%s_tag" % tag_name,
|
||||
"<%s title=\"1\">foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name),
|
||||
"<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name,tag_name))
|
||||
|
||||
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
|
||||
if attribute_name != attribute_name.lower(): continue ### TODO
|
||||
if attribute_name == 'style': continue
|
||||
yield (runSanitizerTest, "test_should_allow_%s_attribute" % attribute_name,
|
||||
"<p %s=\"foo\">foo <bad>bar</bad> baz</p>" % attribute_name,
|
||||
"<p %s='foo'>foo <bad>bar</bad> baz</p>" % attribute_name)
|
||||
|
||||
for attribute_name in sanitizer.HTMLSanitizer.allowed_attributes:
|
||||
attribute_name = attribute_name.upper()
|
||||
yield (runSanitizerTest, "test_should_forbid_%s_attribute" % attribute_name,
|
||||
"<p>foo <bad>bar</bad> baz</p>",
|
||||
"<p %s='display: none;'>foo <bad>bar</bad> baz</p>" % attribute_name)
|
||||
|
||||
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
|
||||
yield (runSanitizerTest, "test_should_allow_%s_uris" % protocol,
|
||||
"<a href=\"%s\">foo</a>" % protocol,
|
||||
"""<a href="%s">foo</a>""" % protocol)
|
||||
|
||||
for protocol in sanitizer.HTMLSanitizer.allowed_protocols:
|
||||
yield (runSanitizerTest, "test_should_allow_uppercase_%s_uris" % protocol,
|
||||
"<a href=\"%s\">foo</a>" % protocol,
|
||||
"""<a href="%s">foo</a>""" % protocol)
|
||||
Reference in New Issue
Block a user