Files
headphones/html5lib/filters/whitespace.py
Ade b8c5782765 Include html5lib for BeautifulSoup
BeautifulSoup needs lxml or html5, have included html5lib.

Also latest BeautifulSoup 4.1.3
2012-09-06 10:47:07 +12:00

42 lines
1.2 KiB
Python

try:
frozenset
except NameError:
# Import from the sets module for python 2.3
from sets import ImmutableSet as frozenset
import re
import _base
from html5lib.constants import rcdataElements, spaceCharacters
spaceCharacters = u"".join(spaceCharacters)
SPACES_REGEX = re.compile(u"[%s]+" % spaceCharacters)
class Filter(_base.Filter):
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))
def __iter__(self):
preserve = 0
for token in _base.Filter.__iter__(self):
type = token["type"]
if type == "StartTag" \
and (preserve or token["name"] in self.spacePreserveElements):
preserve += 1
elif type == "EndTag" and preserve:
preserve -= 1
elif not preserve and type == "SpaceCharacters" and token["data"]:
# Test on token["data"] above to not introduce spaces where there were not
token["data"] = u" "
elif not preserve and type == "Characters":
token["data"] = collapse_spaces(token["data"])
yield token
def collapse_spaces(text):
return SPACES_REGEX.sub(' ', text)