mirror of
https://github.com/rembo10/headphones.git
synced 2026-03-21 20:29:27 +00:00
- Delete from Releases when deleting artist/album - Searcher - Size limits not quite working - Searcher - 1st newznab used even if disabled - Rutracker search stopped working for me, fixed by updating Beautiful Soup. Moved bs4 and html5lib to lib and ensured (I think) it’s imported from the right place
370 lines
14 KiB
Python
370 lines
14 KiB
Python
"""Module for supporting the lxml.etree library. The idea here is to use as much
|
|
of the native library as possible, without using fragile hacks like custom element
|
|
names that break between releases. The downside of this is that we cannot represent
|
|
all possible trees; specifically the following are known to cause problems:
|
|
|
|
Text or comments as siblings of the root element
|
|
Docypes with no name
|
|
|
|
When any of these things occur, we emit a DataLossWarning
|
|
"""
|
|
|
|
from __future__ import absolute_import, division, unicode_literals
|
|
|
|
import warnings
|
|
import re
|
|
import sys
|
|
|
|
from . import _base
|
|
from ..constants import DataLossWarning
|
|
from .. import constants
|
|
from . import etree as etree_builders
|
|
from .. import ihatexml
|
|
|
|
import lxml.etree as etree
|
|
|
|
|
|
fullTree = True
|
|
tag_regexp = re.compile("{([^}]*)}(.*)")
|
|
|
|
comment_type = etree.Comment("asd").tag
|
|
|
|
|
|
class DocumentType(object):
|
|
def __init__(self, name, publicId, systemId):
|
|
self.name = name
|
|
self.publicId = publicId
|
|
self.systemId = systemId
|
|
|
|
|
|
class Document(object):
|
|
def __init__(self):
|
|
self._elementTree = None
|
|
self._childNodes = []
|
|
|
|
def appendChild(self, element):
|
|
self._elementTree.getroot().addnext(element._element)
|
|
|
|
def _getChildNodes(self):
|
|
return self._childNodes
|
|
|
|
childNodes = property(_getChildNodes)
|
|
|
|
|
|
def testSerializer(element):
|
|
rv = []
|
|
finalText = None
|
|
infosetFilter = ihatexml.InfosetFilter()
|
|
|
|
def serializeElement(element, indent=0):
|
|
if not hasattr(element, "tag"):
|
|
if hasattr(element, "getroot"):
|
|
# Full tree case
|
|
rv.append("#document")
|
|
if element.docinfo.internalDTD:
|
|
if not (element.docinfo.public_id or
|
|
element.docinfo.system_url):
|
|
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
|
else:
|
|
dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
|
|
element.docinfo.root_name,
|
|
element.docinfo.public_id,
|
|
element.docinfo.system_url)
|
|
rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
|
|
next_element = element.getroot()
|
|
while next_element.getprevious() is not None:
|
|
next_element = next_element.getprevious()
|
|
while next_element is not None:
|
|
serializeElement(next_element, indent + 2)
|
|
next_element = next_element.getnext()
|
|
elif isinstance(element, str) or isinstance(element, bytes):
|
|
# Text in a fragment
|
|
assert isinstance(element, str) or sys.version_info.major == 2
|
|
rv.append("|%s\"%s\"" % (' ' * indent, element))
|
|
else:
|
|
# Fragment case
|
|
rv.append("#document-fragment")
|
|
for next_element in element:
|
|
serializeElement(next_element, indent + 2)
|
|
elif element.tag == comment_type:
|
|
rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
|
|
if hasattr(element, "tail") and element.tail:
|
|
rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
|
|
else:
|
|
assert isinstance(element, etree._Element)
|
|
nsmatch = etree_builders.tag_regexp.match(element.tag)
|
|
if nsmatch is not None:
|
|
ns = nsmatch.group(1)
|
|
tag = nsmatch.group(2)
|
|
prefix = constants.prefixes[ns]
|
|
rv.append("|%s<%s %s>" % (' ' * indent, prefix,
|
|
infosetFilter.fromXmlName(tag)))
|
|
else:
|
|
rv.append("|%s<%s>" % (' ' * indent,
|
|
infosetFilter.fromXmlName(element.tag)))
|
|
|
|
if hasattr(element, "attrib"):
|
|
attributes = []
|
|
for name, value in element.attrib.items():
|
|
nsmatch = tag_regexp.match(name)
|
|
if nsmatch is not None:
|
|
ns, name = nsmatch.groups()
|
|
name = infosetFilter.fromXmlName(name)
|
|
prefix = constants.prefixes[ns]
|
|
attr_string = "%s %s" % (prefix, name)
|
|
else:
|
|
attr_string = infosetFilter.fromXmlName(name)
|
|
attributes.append((attr_string, value))
|
|
|
|
for name, value in sorted(attributes):
|
|
rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
|
|
|
|
if element.text:
|
|
rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
|
|
indent += 2
|
|
for child in element:
|
|
serializeElement(child, indent)
|
|
if hasattr(element, "tail") and element.tail:
|
|
rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
|
|
serializeElement(element, 0)
|
|
|
|
if finalText is not None:
|
|
rv.append("|%s\"%s\"" % (' ' * 2, finalText))
|
|
|
|
return "\n".join(rv)
|
|
|
|
|
|
def tostring(element):
|
|
"""Serialize an element and its child nodes to a string"""
|
|
rv = []
|
|
finalText = None
|
|
|
|
def serializeElement(element):
|
|
if not hasattr(element, "tag"):
|
|
if element.docinfo.internalDTD:
|
|
if element.docinfo.doctype:
|
|
dtd_str = element.docinfo.doctype
|
|
else:
|
|
dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
|
|
rv.append(dtd_str)
|
|
serializeElement(element.getroot())
|
|
|
|
elif element.tag == comment_type:
|
|
rv.append("<!--%s-->" % (element.text,))
|
|
|
|
else:
|
|
# This is assumed to be an ordinary element
|
|
if not element.attrib:
|
|
rv.append("<%s>" % (element.tag,))
|
|
else:
|
|
attr = " ".join(["%s=\"%s\"" % (name, value)
|
|
for name, value in element.attrib.items()])
|
|
rv.append("<%s %s>" % (element.tag, attr))
|
|
if element.text:
|
|
rv.append(element.text)
|
|
|
|
for child in element:
|
|
serializeElement(child)
|
|
|
|
rv.append("</%s>" % (element.tag,))
|
|
|
|
if hasattr(element, "tail") and element.tail:
|
|
rv.append(element.tail)
|
|
|
|
serializeElement(element)
|
|
|
|
if finalText is not None:
|
|
rv.append("%s\"" % (' ' * 2, finalText))
|
|
|
|
return "".join(rv)
|
|
|
|
|
|
class TreeBuilder(_base.TreeBuilder):
|
|
documentClass = Document
|
|
doctypeClass = DocumentType
|
|
elementClass = None
|
|
commentClass = None
|
|
fragmentClass = Document
|
|
implementation = etree
|
|
|
|
def __init__(self, namespaceHTMLElements, fullTree=False):
|
|
builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
|
|
infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
|
|
self.namespaceHTMLElements = namespaceHTMLElements
|
|
|
|
class Attributes(dict):
|
|
def __init__(self, element, value={}):
|
|
self._element = element
|
|
dict.__init__(self, value)
|
|
for key, value in self.items():
|
|
if isinstance(key, tuple):
|
|
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
|
else:
|
|
name = infosetFilter.coerceAttribute(key)
|
|
self._element._element.attrib[name] = value
|
|
|
|
def __setitem__(self, key, value):
|
|
dict.__setitem__(self, key, value)
|
|
if isinstance(key, tuple):
|
|
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
|
else:
|
|
name = infosetFilter.coerceAttribute(key)
|
|
self._element._element.attrib[name] = value
|
|
|
|
class Element(builder.Element):
|
|
def __init__(self, name, namespace):
|
|
name = infosetFilter.coerceElement(name)
|
|
builder.Element.__init__(self, name, namespace=namespace)
|
|
self._attributes = Attributes(self)
|
|
|
|
def _setName(self, name):
|
|
self._name = infosetFilter.coerceElement(name)
|
|
self._element.tag = self._getETreeTag(
|
|
self._name, self._namespace)
|
|
|
|
def _getName(self):
|
|
return infosetFilter.fromXmlName(self._name)
|
|
|
|
name = property(_getName, _setName)
|
|
|
|
def _getAttributes(self):
|
|
return self._attributes
|
|
|
|
def _setAttributes(self, attributes):
|
|
self._attributes = Attributes(self, attributes)
|
|
|
|
attributes = property(_getAttributes, _setAttributes)
|
|
|
|
def insertText(self, data, insertBefore=None):
|
|
data = infosetFilter.coerceCharacters(data)
|
|
builder.Element.insertText(self, data, insertBefore)
|
|
|
|
def appendChild(self, child):
|
|
builder.Element.appendChild(self, child)
|
|
|
|
class Comment(builder.Comment):
|
|
def __init__(self, data):
|
|
data = infosetFilter.coerceComment(data)
|
|
builder.Comment.__init__(self, data)
|
|
|
|
def _setData(self, data):
|
|
data = infosetFilter.coerceComment(data)
|
|
self._element.text = data
|
|
|
|
def _getData(self):
|
|
return self._element.text
|
|
|
|
data = property(_getData, _setData)
|
|
|
|
self.elementClass = Element
|
|
self.commentClass = builder.Comment
|
|
# self.fragmentClass = builder.DocumentFragment
|
|
_base.TreeBuilder.__init__(self, namespaceHTMLElements)
|
|
|
|
def reset(self):
|
|
_base.TreeBuilder.reset(self)
|
|
self.insertComment = self.insertCommentInitial
|
|
self.initial_comments = []
|
|
self.doctype = None
|
|
|
|
def testSerializer(self, element):
|
|
return testSerializer(element)
|
|
|
|
def getDocument(self):
|
|
if fullTree:
|
|
return self.document._elementTree
|
|
else:
|
|
return self.document._elementTree.getroot()
|
|
|
|
def getFragment(self):
|
|
fragment = []
|
|
element = self.openElements[0]._element
|
|
if element.text:
|
|
fragment.append(element.text)
|
|
fragment.extend(list(element))
|
|
if element.tail:
|
|
fragment.append(element.tail)
|
|
return fragment
|
|
|
|
def insertDoctype(self, token):
|
|
name = token["name"]
|
|
publicId = token["publicId"]
|
|
systemId = token["systemId"]
|
|
|
|
if not name:
|
|
warnings.warn("lxml cannot represent empty doctype", DataLossWarning)
|
|
self.doctype = None
|
|
else:
|
|
coercedName = self.infosetFilter.coerceElement(name)
|
|
if coercedName != name:
|
|
warnings.warn("lxml cannot represent non-xml doctype", DataLossWarning)
|
|
|
|
doctype = self.doctypeClass(coercedName, publicId, systemId)
|
|
self.doctype = doctype
|
|
|
|
def insertCommentInitial(self, data, parent=None):
|
|
self.initial_comments.append(data)
|
|
|
|
def insertCommentMain(self, data, parent=None):
|
|
if (parent == self.document and
|
|
self.document._elementTree.getroot()[-1].tag == comment_type):
|
|
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
|
|
super(TreeBuilder, self).insertComment(data, parent)
|
|
|
|
def insertRoot(self, token):
|
|
"""Create the document root"""
|
|
# Because of the way libxml2 works, it doesn't seem to be possible to
|
|
# alter information like the doctype after the tree has been parsed.
|
|
# Therefore we need to use the built-in parser to create our iniial
|
|
# tree, after which we can add elements like normal
|
|
docStr = ""
|
|
if self.doctype:
|
|
assert self.doctype.name
|
|
docStr += "<!DOCTYPE %s" % self.doctype.name
|
|
if (self.doctype.publicId is not None or
|
|
self.doctype.systemId is not None):
|
|
docStr += (' PUBLIC "%s" ' %
|
|
(self.infosetFilter.coercePubid(self.doctype.publicId or "")))
|
|
if self.doctype.systemId:
|
|
sysid = self.doctype.systemId
|
|
if sysid.find("'") >= 0 and sysid.find('"') >= 0:
|
|
warnings.warn("DOCTYPE system cannot contain single and double quotes", DataLossWarning)
|
|
sysid = sysid.replace("'", 'U00027')
|
|
if sysid.find("'") >= 0:
|
|
docStr += '"%s"' % sysid
|
|
else:
|
|
docStr += "'%s'" % sysid
|
|
else:
|
|
docStr += "''"
|
|
docStr += ">"
|
|
if self.doctype.name != token["name"]:
|
|
warnings.warn("lxml cannot represent doctype with a different name to the root element", DataLossWarning)
|
|
docStr += "<THIS_SHOULD_NEVER_APPEAR_PUBLICLY/>"
|
|
root = etree.fromstring(docStr)
|
|
|
|
# Append the initial comments:
|
|
for comment_token in self.initial_comments:
|
|
root.addprevious(etree.Comment(comment_token["data"]))
|
|
|
|
# Create the root document and add the ElementTree to it
|
|
self.document = self.documentClass()
|
|
self.document._elementTree = root.getroottree()
|
|
|
|
# Give the root element the right name
|
|
name = token["name"]
|
|
namespace = token.get("namespace", self.defaultNamespace)
|
|
if namespace is None:
|
|
etree_tag = name
|
|
else:
|
|
etree_tag = "{%s}%s" % (namespace, name)
|
|
root.tag = etree_tag
|
|
|
|
# Add the root element to the internal child/open data structures
|
|
root_element = self.elementClass(name, namespace)
|
|
root_element._element = root
|
|
self.document._childNodes.append(root_element)
|
|
self.openElements.append(root_element)
|
|
|
|
# Reset to the default insert comment function
|
|
self.insertComment = self.insertCommentMain
|