mirror of
https://github.com/rembo10/headphones.git
synced 2026-05-02 17:59:28 +01:00
Include html5lib for BeautifulSoup
BeautifulSoup needs lxml or html5, have included html5lib. Also latest BeautifulSoup 4.1.3
This commit is contained in:
127
html5lib/tests/support.py
Normal file
127
html5lib/tests/support.py
Normal file
@@ -0,0 +1,127 @@
|
||||
import os
|
||||
import sys
|
||||
import codecs
|
||||
import glob
|
||||
|
||||
base_path = os.path.split(__file__)[0]
|
||||
|
||||
if os.path.exists(os.path.join(base_path, 'testdata')):
|
||||
#release
|
||||
test_dir = os.path.join(base_path, 'testdata')
|
||||
else:
|
||||
#development
|
||||
test_dir = os.path.abspath(
|
||||
os.path.join(base_path,
|
||||
os.path.pardir, os.path.pardir,
|
||||
os.path.pardir, 'testdata'))
|
||||
assert os.path.exists(test_dir), "Test data not found"
|
||||
#import the development html5lib
|
||||
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
|
||||
os.path.pardir,
|
||||
os.path.pardir)))
|
||||
|
||||
import html5lib
|
||||
from html5lib import html5parser, treebuilders
|
||||
del base_path
|
||||
|
||||
#Build a dict of avaliable trees
|
||||
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
|
||||
"DOM":treebuilders.getTreeBuilder("dom")}
|
||||
|
||||
#Try whatever etree implementations are avaliable from a list that are
|
||||
#"supposed" to work
|
||||
try:
|
||||
import xml.etree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
try:
|
||||
import elementtree.ElementTree as ElementTree
|
||||
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
try:
|
||||
import cElementTree
|
||||
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import lxml.etree as lxml
|
||||
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import BeautifulSoup
|
||||
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def html5lib_test_files(subdirectory, files='*.dat'):
|
||||
return glob.glob(os.path.join(test_dir,subdirectory,files))
|
||||
|
||||
class DefaultDict(dict):
|
||||
def __init__(self, default, *args, **kwargs):
|
||||
self.default = default
|
||||
dict.__init__(self, *args, **kwargs)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
class TestData(object):
|
||||
def __init__(self, filename, newTestHeading="data"):
|
||||
self.f = codecs.open(filename, encoding="utf8")
|
||||
self.newTestHeading = newTestHeading
|
||||
|
||||
def __iter__(self):
|
||||
data = DefaultDict(None)
|
||||
key=None
|
||||
for line in self.f:
|
||||
heading = self.isSectionHeading(line)
|
||||
if heading:
|
||||
if data and heading == self.newTestHeading:
|
||||
#Remove trailing newline
|
||||
data[key] = data[key][:-1]
|
||||
yield self.normaliseOutput(data)
|
||||
data = DefaultDict(None)
|
||||
key = heading
|
||||
data[key]=""
|
||||
elif key is not None:
|
||||
data[key] += line
|
||||
if data:
|
||||
yield self.normaliseOutput(data)
|
||||
|
||||
def isSectionHeading(self, line):
|
||||
"""If the current heading is a test section heading return the heading,
|
||||
otherwise return False"""
|
||||
if line.startswith("#"):
|
||||
return line[1:].strip()
|
||||
else:
|
||||
return False
|
||||
|
||||
def normaliseOutput(self, data):
|
||||
#Remove trailing newlines
|
||||
for key,value in data.iteritems():
|
||||
if value.endswith("\n"):
|
||||
data[key] = value[:-1]
|
||||
return data
|
||||
|
||||
def convert(stripChars):
|
||||
def convertData(data):
|
||||
"""convert the output of str(document) to the format used in the testcases"""
|
||||
data = data.split("\n")
|
||||
rv = []
|
||||
for line in data:
|
||||
if line.startswith("|"):
|
||||
rv.append(line[stripChars:])
|
||||
else:
|
||||
rv.append(line)
|
||||
return "\n".join(rv)
|
||||
return convertData
|
||||
|
||||
convertExpected = convert(2)
|
||||
Reference in New Issue
Block a user